mirror of
https://github.com/jeffusion/gitea-ai-assistant.git
synced 2026-06-12 23:16:49 +00:00
Compare commits
28 Commits
main
...
opencode/s
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2fac1f6942 | ||
|
|
45fcf2eaa1 | ||
|
|
d48eee3474 | ||
|
|
c0de9238b5 | ||
|
|
aa8d4ab072 | ||
|
|
1831704644 | ||
|
|
f0e45a5ae5 | ||
|
|
0ad83a4082 | ||
|
|
eeb209dbaf | ||
|
|
e1d8c1b7d2 | ||
|
|
6d62b9f87c | ||
|
|
bcc9e7b8eb | ||
|
|
12e1f4717b | ||
|
|
6ca9edecfd | ||
|
|
c4cbced8af | ||
|
|
e0ab3019db | ||
|
|
cd2bdf4131 | ||
|
|
b304814e42 | ||
|
|
1ff629cffb | ||
|
|
8ccc7452e5 | ||
|
|
b2b914f919 | ||
|
|
7b9b9e69a7 | ||
|
|
46c5e09a62 | ||
|
|
1a43b1f206 | ||
|
|
1b26fac951 | ||
|
|
38e4c58d71 | ||
|
|
5b29e2d4af | ||
|
|
ac40957ede |
55
.github/workflows/ci.yml
vendored
55
.github/workflows/ci.yml
vendored
@@ -52,3 +52,58 @@ jobs:
|
||||
path: |
|
||||
frontend/playwright-report/
|
||||
frontend/test-results/
|
||||
|
||||
e2e:
|
||||
runs-on: ubuntu-22.04
|
||||
needs: test
|
||||
|
||||
services:
|
||||
gitea:
|
||||
image: gitea/gitea:1.22
|
||||
ports: ['3333:3000']
|
||||
env:
|
||||
GITEA__database__DB_TYPE: sqlite3
|
||||
GITEA__server__ROOT_URL: http://localhost:3333
|
||||
GITEA__security__INSTALL_LOCK: true
|
||||
GITEA__webhook__ALLOWED_HOST_LIST: '*'
|
||||
GITEA__webhook__SKIP_TLS_VERIFY: true
|
||||
options: >-
|
||||
--health-cmd "curl -f http://localhost:3000/api/v1/version"
|
||||
--health-interval 5s
|
||||
--health-timeout 3s
|
||||
--health-retries 20
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Bun
|
||||
uses: oven-sh/setup-bun@v2
|
||||
with:
|
||||
bun-version: 1.3.10
|
||||
|
||||
- name: Install dependencies
|
||||
run: bun install --frozen-lockfile
|
||||
|
||||
- name: Install git
|
||||
run: sudo apt-get update && sudo apt-get install -y git
|
||||
|
||||
- name: Create Gitea admin user
|
||||
run: |
|
||||
for i in $(seq 1 10); do
|
||||
if docker exec $(docker ps -q --filter "ancestor=gitea/gitea:1.22") \
|
||||
gitea admin user create --username e2e-admin --password 'e2ePassword123!' --email 'e2e@test.local' --admin 2>/dev/null; then
|
||||
echo "User created"
|
||||
break
|
||||
fi
|
||||
echo "Retrying... ($i)"
|
||||
sleep 3
|
||||
done || true
|
||||
docker exec -u git $(docker ps -q --filter "ancestor=gitea/gitea:1.22") \
|
||||
gitea admin user create --username e2e-admin --password 'e2ePassword123!' --email 'e2e@test.local' --admin 2>/dev/null || true
|
||||
|
||||
- name: Run E2E tests
|
||||
run: bun run test:e2e
|
||||
env:
|
||||
E2E_GITEA_URL: http://localhost:3333
|
||||
E2E_MOCK_LLM: 1
|
||||
|
||||
@@ -1,14 +1,8 @@
|
||||
version: '3.8'
|
||||
|
||||
# E2E 测试环境:Gitea + gitea-assistant
|
||||
# 用法:
|
||||
# docker compose -f docker-compose.e2e.yml up -d
|
||||
# # 等待服务启动后运行 seed 脚本:
|
||||
# ./e2e/seed.sh
|
||||
# # 运行 E2E 测试:
|
||||
# ./e2e/test.sh
|
||||
# # 清理:
|
||||
# docker compose -f docker-compose.e2e.yml down -v
|
||||
# docker compose -f docker-compose.e2e.yml up -d
|
||||
# ./e2e/seed.sh
|
||||
# docker compose -f docker-compose.e2e.yml down -v
|
||||
|
||||
services:
|
||||
gitea:
|
||||
@@ -47,8 +41,15 @@ services:
|
||||
- GITEA_API_URL=http://gitea:3000/api/v1
|
||||
- GITEA_ACCESS_TOKEN=${E2E_GITEA_TOKEN:-placeholder}
|
||||
- PORT=5174
|
||||
- ENCRYPTION_KEY=5752fac0e57d00e9b7954863faef878693420e6b06bc20d710897587e802668a
|
||||
- REVIEW_ENGINE=kernel
|
||||
- REVIEW_WORKDIR=/tmp/e2e-review
|
||||
- DATABASE_PATH=/data/assistant.db
|
||||
- E2E_MOCK_LLM=1
|
||||
ports:
|
||||
- "3334:5174"
|
||||
volumes:
|
||||
- assistant-data:/data
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:5174/api/health"]
|
||||
interval: 5s
|
||||
@@ -58,3 +59,4 @@ services:
|
||||
|
||||
volumes:
|
||||
gitea-data:
|
||||
assistant-data:
|
||||
|
||||
@@ -13,6 +13,7 @@ This project keeps the root `README.md` concise and moves implementation/deploym
|
||||
## Architecture & design
|
||||
|
||||
- [Pluggable LLM providers](./design/pluggable-llm-providers.md)
|
||||
- [Kernel built-in Agent architecture](./design/kernel-built-in-agents.md)
|
||||
- [Notification service refactoring](./design/notification-service-refactoring.md)
|
||||
- [UI theme language](./design/ui-theme-language.md)
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
## 架构与设计
|
||||
|
||||
- [可插拔 LLM 提供商设计](./design/pluggable-llm-providers.md)
|
||||
- [Kernel 内置 Agent 架构设计](./design/kernel-built-in-agents.md)
|
||||
- [通知服务重构设计](./design/notification-service-refactoring.md)
|
||||
- [UI 主题语言设计](./design/ui-theme-language.md)
|
||||
|
||||
|
||||
890
docs/design/kernel-built-in-agents.md
Normal file
890
docs/design/kernel-built-in-agents.md
Normal file
@@ -0,0 +1,890 @@
|
||||
# 技术设计文档:Kernel 内置 Agent 架构
|
||||
|
||||
> **状态**: Draft
|
||||
> **作者**: AI Architect
|
||||
> **日期**: 2026-04-28
|
||||
> **相关模块**: `src/agent-kernel/`、`src/review/kernel/`
|
||||
> **适用范围**: Review Kernel 的内置 subagent 体系、运行时委派、管理后台可观测能力与生产测试门禁
|
||||
|
||||
---
|
||||
|
||||
## 目录
|
||||
|
||||
- [0. 文档信息](#0-文档信息)
|
||||
- [1. 背景与目标](#1-背景与目标)
|
||||
- [2. 设计原则与关键取舍](#2-设计原则与关键取舍)
|
||||
- [3. 概要设计](#3-概要设计)
|
||||
- [4. 内置 Agent 详细设计](#4-内置-agent-详细设计)
|
||||
- [4.8 Agent工作机制详解](#48-agent工作机制详解)
|
||||
- [5. 运行时与状态设计](#5-运行时与状态设计)
|
||||
- [6. API 与管理后台可观测性](#6-api-与管理后台可观测性)
|
||||
- [7. 非功能性设计](#7-非功能性设计)
|
||||
- [8. 测试与上线验证](#8-测试与上线验证)
|
||||
- [9. 风险、待确认与后续演进](#9-风险待确认与后续演进)
|
||||
|
||||
---
|
||||
|
||||
## 0. 文档信息
|
||||
|
||||
| 字段 | 内容 |
|
||||
|---|---|
|
||||
| 版本 | v0.1 |
|
||||
| 状态 | 草案 |
|
||||
| 目标读者 | 研发 / 架构 / QA / 运维 / 管理后台开发 |
|
||||
| 系统类型 | AI 应用工程 / 后端 Agent Runtime / 审查系统适配层 |
|
||||
| 主要代码路径 | `src/agent-kernel/`、`src/review/kernel/` |
|
||||
| 相关配置 | `REVIEW_ENGINE=kernel` |
|
||||
|
||||
### Assumptions
|
||||
|
||||
- 当前项目已选择 **kernel-first** 作为代码审查主路径;旧固定 agent 编排不作为未来运行时主路径。
|
||||
- 内置 Agent 当前以 **built-in subagent definition** 的方式注册,后续可演进到 plugin/custom subagent 加载。
|
||||
- 一条 PR 对应一个 kernel session,commit 更新、人工反馈和后续恢复都写入同一 session。
|
||||
|
||||
### To Be Confirmed
|
||||
|
||||
- 是否需要把 built-in subagent 的定义从 TypeScript 代码进一步外置为 YAML/JSON/插件目录。
|
||||
- 管理后台是否需要支持逐 subagent 的启用/禁用、版本选择与灰度策略。
|
||||
|
||||
---
|
||||
|
||||
## 1. 背景与目标
|
||||
|
||||
### 1.1 背景
|
||||
|
||||
早期审查系统采用固定流程编排:triage 后按审查域派生多个 specialist,再由额外阶段汇总。该方案的问题是:
|
||||
|
||||
- 流程扩展需要修改 orchestrator/runtime 代码;
|
||||
- 角色能力与执行链路耦合,难以按能力标签选择代理;
|
||||
- 缺少独立 subagent identity、delegation boundary 和 invocation trace;
|
||||
- 管理后台难以展示“有哪些 Agent、何时被调用、产生了什么结果”;
|
||||
- 恢复、压缩、权限、hook 等横切能力难以统一接入。
|
||||
|
||||
新的 Kernel 内置 Agent 架构将 review 角色转换为注册式 built-in subagents,由 `AgentKernelRunner` 根据 planner 输出与 session state 推进任务,并通过 `KernelAgentInvoker` 统一委派执行。
|
||||
|
||||
### 1.2 核心目标
|
||||
|
||||
| 目标 | 说明 |
|
||||
|---|---|
|
||||
| 注册式扩展 | 内置 Agent 以 `KernelSubagentDefinition` 注册,runtime 不硬编码角色实例 |
|
||||
| 能力选择 | planner 通过 tags/capabilities 选择 subagent,而不是写死 agent id |
|
||||
| 可恢复执行 | session checkpoint 持久化 state + pendingTasks,支持 feedback 后继续执行 |
|
||||
| 委派边界 | 每次 subagent 调用都有 agentId、delegation packet、invocation record、structured result |
|
||||
| 上下文压缩 | 大上下文触发 compression,summary 写入 checkpoint 并回注后续 subagent |
|
||||
| 工具治理 | 工具调用走统一 orchestration、permission gating 与 hooks |
|
||||
| 可观测性 | 管理 API 暴露 task/subagent/hook catalog、session timeline、subagent invocations |
|
||||
|
||||
### 1.3 范围与非范围
|
||||
|
||||
**范围内**:
|
||||
|
||||
- Review Kernel 内置 subagents 的定义、职责、标签、运行链路;
|
||||
- Kernel agent registry / invoker / runner 与 session checkpoint 的协作;
|
||||
- 内置 Agent 与 tools、hooks、permission、compression 的集成方式;
|
||||
- 管理后台需要消费的 catalog 与 session 投影视图;
|
||||
- 生产前自动化测试门禁。
|
||||
|
||||
**范围外**:
|
||||
|
||||
- 前端 UI 视觉设计细节;
|
||||
- 旧 `agent` 固定编排引擎兼容;
|
||||
- Codex CLI 引擎内部实现;
|
||||
- 通用插件市场、远程 agent 执行后端和多租户权限模型。
|
||||
|
||||
---
|
||||
|
||||
## 2. 设计原则与关键取舍
|
||||
|
||||
### 2.1 核心设计原则
|
||||
|
||||
| 原则 | 落地方式 |
|
||||
|---|---|
|
||||
| 高内聚低耦合 | `src/agent-kernel/` 只提供通用 session/runner/registry/invoker/hooks;review 逻辑放在 `src/review/kernel/` |
|
||||
| 开闭原则 | 新增流程能力优先增加 subagent、skill、hook 或 tool,而不是修改主循环 |
|
||||
| Session 为状态源 | PR/commit session 记录 event、checkpoint、subagent invocation,是恢复与投影的事实来源 |
|
||||
| 可观测优先 | 每次 subagent 调用持久化 invocation;每个 task 写入 started/completed/failed event |
|
||||
| 安全默认 | 工具执行统一经过 permission gating;高风险 scope 默认 ask/deny |
|
||||
| 可测试 | 断言面落在 checkpoint、events、invocations、tool result、admin projection,而不是完整 LLM 文本 |
|
||||
|
||||
### 2.2 关键取舍
|
||||
|
||||
| 取舍点 | 选择 | 原因 |
|
||||
|---|---|---|
|
||||
| 内置 Agent 表达方式 | TypeScript built-in definitions | 当前阶段需要强类型、低迁移成本;后续可迁移到 plugin loader |
|
||||
| Agent 调用入口 | `KernelAgentInvoker` 统一调用 | 统一 agentId、hook、invocation persistence、structured result |
|
||||
| 流程推进方式 | planner + session state | 避免静态任务数组;支持继续执行与人审恢复 |
|
||||
| Findings 处理 | 本地归一化、去重、排序与发布 | full review 只产出 findings;后续由 skill/本地逻辑保证确定性 |
|
||||
| 压缩策略 | planner 模型窗口 80% 触发 | 使用 tokenlens context window,预留 20% 冗余 |
|
||||
| 管理接口 | task/subagent/hook catalog + session detail | 让后台可解释当前能力目录与执行轨迹 |
|
||||
|
||||
---
|
||||
|
||||
## 3. 概要设计
|
||||
|
||||
### 3.1 总体架构
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
Webhook[Gitea Webhook / Feedback] --> Engine[KernelReviewEngine]
|
||||
Engine --> Session[(Kernel Session Repository)]
|
||||
Engine --> Runtime[ReviewKernelRuntime]
|
||||
|
||||
Runtime --> Runner[AgentKernelRunner]
|
||||
Runtime --> SkillRegistry[KernelTaskRegistry / Skills]
|
||||
Runtime --> AgentRegistry[KernelAgentRegistry / Built-in Subagents]
|
||||
Runtime --> HookRegistry[KernelHookRegistry]
|
||||
Runtime --> ToolRegistry[ToolRegistry]
|
||||
|
||||
Runner --> Planner[State-driven Planner]
|
||||
Planner --> SkillTask[Skill Task]
|
||||
Planner --> SubagentTask[Subagent Task]
|
||||
|
||||
SkillTask --> SkillRegistry
|
||||
SubagentTask --> Invoker[KernelAgentInvoker]
|
||||
Invoker --> AgentContext[AsyncLocalStorage Agent Context]
|
||||
Invoker --> Invocation[(Subagent Invocation Record)]
|
||||
Invoker --> Builtins[Review Built-in Subagents]
|
||||
|
||||
Builtins --> Triage[review:triage]
|
||||
Builtins --> FullReview[review:full_review]
|
||||
|
||||
FullReview --> ToolOrchestration[Tool Orchestration]
|
||||
ToolOrchestration --> Permission[Permission Gating]
|
||||
ToolOrchestration --> Hooks[Pre/Post Tool Hooks]
|
||||
|
||||
Runtime --> AdminAPI[Admin API Catalog / Session Projection]
|
||||
```
|
||||
|
||||
### 3.2 模块职责
|
||||
|
||||
| 模块 | 文件 | 职责 |
|
||||
|---|---|---|
|
||||
| Kernel types | `src/agent-kernel/types.ts` | 定义 task、subagent、delegation packet、checkpoint、invocation result |
|
||||
| Agent registry | `src/agent-kernel/agents/kernel-agent-registry.ts` | 注册、查询、按 tag 过滤 subagent |
|
||||
| Agent invoker | `src/agent-kernel/agents/kernel-agent-invoker.ts` | 创建 agentId、触发 hook、持久化 invocation、执行 subagent |
|
||||
| Agent context | `src/agent-kernel/agents/kernel-agent-context.ts` | 使用 AsyncLocalStorage 隔离子代理执行上下文 |
|
||||
| Runner | `src/agent-kernel/runtime/agent-kernel-runner.ts` | 按 planner 结果推进 skill/subagent task,写 checkpoint 与 task event |
|
||||
| Session repo | `src/agent-kernel/session/session-repository.ts` | 持久化 session、events、checkpoint、subagent invocations |
|
||||
| Review runtime | `src/review/kernel/review-kernel-runtime.ts` | 注册 skills/hooks/built-in subagents,提供 execute/continueExecution |
|
||||
| Built-in subagents | `src/review/kernel/review-built-in-subagents.ts` | 将 triage 与 full_review 转换为注册式 subagent definitions |
|
||||
| Subagent ids | `src/review/kernel/review-subagent-ids.ts` | 统一内置 subagent id 命名 |
|
||||
| Admin projection | `src/review/kernel/session-read-model.ts` | 将 session event/checkpoint/invocation 投影为后台视图 |
|
||||
|
||||
### 3.3 核心执行链路
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant E as KernelReviewEngine
|
||||
participant R as ReviewKernelRuntime
|
||||
participant S as SessionRepository
|
||||
participant K as AgentKernelRunner
|
||||
participant I as KernelAgentInvoker
|
||||
participant A as Built-in Subagent
|
||||
|
||||
E->>S: ensureSession(scopeKey)
|
||||
E->>R: execute(run, sessionId)
|
||||
R->>S: appendEvent(run_started)
|
||||
R->>K: run(initialState, initialTasks=[])
|
||||
loop until stopReason
|
||||
K->>K: planner.plan(state)
|
||||
alt skill task
|
||||
K->>R: execute skill handler
|
||||
else subagent task
|
||||
K->>I: invoke(task, context)
|
||||
I->>S: createSubagentInvocation(running)
|
||||
I->>A: execute(task, agentContext)
|
||||
A-->>I: KernelHandlerResult
|
||||
I->>S: completeSubagentInvocation(completed)
|
||||
end
|
||||
K->>S: appendEvent(task_completed)
|
||||
K->>S: saveCheckpoint(state, pendingTasks, stopReason)
|
||||
end
|
||||
R->>S: appendEvent(run_completed)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. 内置 Agent 详细设计
|
||||
|
||||
### 4.1 内置 Agent 目录
|
||||
|
||||
| Subagent ID | Source | Model Role | Tags | 职责 | 触发条件 |
|
||||
|---|---|---|---|---|---|
|
||||
| `review:triage` | `built-in` | `planner` | `review`, `planner`, `triage` | 根据 diff、文件、风险生成自主审查提示、模式和预算 | build context 完成且尚无 triage 结果 |
|
||||
| `review:full_review` | `built-in` | `specialist` | `review`, `specialist`, `full-review`, `autonomous-review` | 执行一次完整自主代码审查,模型自行选择工具和调查路径 | triage 完成且尚未完成 full review |
|
||||
|
||||
### 4.2 Subagent Definition 契约
|
||||
|
||||
每个内置 Agent 必须实现 `KernelSubagentDefinition<TState>`:
|
||||
|
||||
```typescript
|
||||
interface KernelSubagentDefinition<TState> {
|
||||
kind: 'subagent';
|
||||
name: string;
|
||||
source: 'built-in' | 'custom' | 'plugin';
|
||||
description: string;
|
||||
whenToUse: string;
|
||||
tags?: string[];
|
||||
modelRole?: string;
|
||||
resumable?: boolean;
|
||||
execute(task, context): Promise<KernelHandlerResult<TState> | undefined>;
|
||||
}
|
||||
```
|
||||
|
||||
关键约束:
|
||||
|
||||
- `name` 必须稳定,作为 session event、invocation、admin catalog 的统一标识;
|
||||
- `tags` 必须包含能力标签,planner 只能按 tag/capability 选择代理;
|
||||
- `whenToUse` 既用于管理后台解释,也用于 delegation packet 的 goal;
|
||||
- `execute` 不直接控制主循环,只返回 state/enqueue/prepend/stopReason;
|
||||
- 内置 Agent 不应越权直接修改 pendingTasks,除非通过标准 `KernelHandlerResult`。
|
||||
|
||||
### 4.3 Planner 选择规则
|
||||
|
||||
`ReviewKernelRuntime.planTasks()` 根据 checkpoint state 推导下一步:
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A[开始 plan] --> B{有 pendingTasks?}
|
||||
B -- 是 --> Z[不新增任务]
|
||||
B -- 否 --> C{缺 workspace?}
|
||||
C -- 是 --> PW[prepare_workspace skill]
|
||||
C -- 否 --> D{缺 context?}
|
||||
D -- 是 --> BC[build_context skill]
|
||||
D -- 否 --> E{需要压缩?}
|
||||
E -- 是 --> CC[compress_context skill]
|
||||
E -- 否 --> F{缺 triage?}
|
||||
F -- 是 --> T[按 tag=triage 选择 review:triage]
|
||||
F -- 否 --> G{full review 未完成?}
|
||||
G -- 是 --> S[执行 review:full_review]
|
||||
G -- 否 --> P{未 publish?}
|
||||
P -- 是 --> PR[publish_review skill]
|
||||
P -- 否 --> R{未保存 reviewed ref?}
|
||||
R -- 是 --> SR[save_reviewed_ref skill]
|
||||
R -- 否 --> DONE[completed]
|
||||
```
|
||||
|
||||
### 4.4 Triage Agent
|
||||
|
||||
`review:triage` 包装 `TriageAgent`,输出自主审查提示:
|
||||
|
||||
- 使用 `planner` 模型角色;
|
||||
- 接收 `projectPrompt` 和 `compressedContext.summary`;
|
||||
- 生成 `mode`、`reviewSize`、`riskTags`、`suspectedEntrypoints` 与预算提示;
|
||||
- 提示只影响 full review 的调查起点,不拆分审查任务。
|
||||
|
||||
### 4.5 Autonomous Full Review Agent
|
||||
|
||||
`review:full_review` 包装 `AutonomousReviewAgent`:
|
||||
|
||||
- 共享 `ToolRegistry` 与 `KernelHookRegistry`;
|
||||
- 根据 `ReviewTask` 控制 mode、reviewSize、riskTags、suspectedEntrypoints、maxTurns、maxToolCalls、maxElapsedMs、tokenBudget;
|
||||
- 支持压缩 summary 回注到 prompt;
|
||||
- 不预拆 correctness/security/quality 子任务,模型在一次自主循环内跨文件调查;
|
||||
- 工具调用统一经过 tool orchestration、permission gating、Pre/Post tool hooks。
|
||||
|
||||
### 4.6 Aggregate Findings Skill
|
||||
|
||||
`aggregate_findings` 是 full review 后的确定性本地步骤:
|
||||
|
||||
- 接收 `review:full_review` 产出的 findings;
|
||||
- 归一化 category/severity/confidence,补齐 fingerprint;
|
||||
- 按 fingerprint 去重,并按 severity/path/line/title 稳定排序;
|
||||
- 写回 checkpoint,供后续发布步骤使用。
|
||||
|
||||
### 4.7 Publish and Save Skills
|
||||
|
||||
`publish_review` 与 `save_reviewed_ref` 负责外部副作用:
|
||||
|
||||
- `publish_review` 生成确定性 summary,并发布 PR summary 与 line comments;
|
||||
- `save_reviewed_ref` 在本地 mirror 保存已审查 ref,用于后续增量审查;
|
||||
- 两个步骤分离,避免评论发布和 ref 保存互相污染,失败时依赖 checkpoint 重试。
|
||||
|
||||
---
|
||||
|
||||
## 4.8 Agent工作机制详解
|
||||
|
||||
本节详细说明 Kernel Agent 的运转机制、任务调度、工具调用、决策逻辑及边界划分。
|
||||
|
||||
### 4.8.1 核心运转架构
|
||||
|
||||
Kernel 采用「**事件驱动 + 状态机**」的运行模式:
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
Webhook[Gitea Webhook / Feedback] --> Engine[KernelReviewEngine]
|
||||
Engine --> Session[Session Repository]
|
||||
Engine --> Runtime[ReviewKernelRuntime]
|
||||
Runtime --> Runner[AgentKernelRunner]
|
||||
Runner --> Planner[Turn Planner]
|
||||
Planner --> Tasks[Tasks Queue]
|
||||
Tasks --> Executor[Task Executor]
|
||||
Executor --> State[State Update]
|
||||
State --> Checkpoint[Checkpoint Save]
|
||||
Checkpoint --> Runner
|
||||
```
|
||||
|
||||
**关键组件职责**:
|
||||
|
||||
| 组件 | 文件 | 核心职责 |
|
||||
|------|------|----------|
|
||||
| **AgentKernelRunner** | `agent-kernel-runner.ts` | 主循环控制器:任务调度、状态流转、checkpoint 管理 |
|
||||
| **ReviewKernelRuntime** | `review-kernel-runtime.ts` | Review 业务运行时:封装 skills、subagents、hooks、tools |
|
||||
| **KernelTurnPlanner** | `review-kernel-runtime.ts:305-361` | 基于当前 state 决定下一步执行什么任务 |
|
||||
|
||||
### 4.8.2 核心运转流程
|
||||
|
||||
**1. 启动阶段**:
|
||||
```typescript
|
||||
// PR webhook 触发
|
||||
kernelReviewEngine.enqueuePullRequest(payload)
|
||||
→ ensureSession(scopeKey) // 创建或复用 session
|
||||
→ runtime.execute(run, sessionId) // 启动运行时
|
||||
→ AgentKernelRunner.run({ // 启动主循环
|
||||
sessionId,
|
||||
initialState: {...},
|
||||
initialTasks: []
|
||||
})
|
||||
```
|
||||
|
||||
**2. 主循环机制** (`AgentKernelRunner.run`):
|
||||
|
||||
```typescript
|
||||
async run({ sessionId, initialState, initialTasks, continueExisting }) {
|
||||
// 从 checkpoint 恢复状态(支持继续执行)
|
||||
const persisted = loadCheckpoint(sessionId);
|
||||
let state = persisted?.state ?? initialState;
|
||||
const pendingTasks = [...(persisted?.pendingTasks ?? initialTasks)];
|
||||
|
||||
// 主循环:直到有 stopReason
|
||||
while (!stopReason) {
|
||||
// 如果没有待执行任务,让 planner 规划新任务
|
||||
if (pendingTasks.length === 0) {
|
||||
const planned = planner.plan({ session, state, pendingTasks });
|
||||
pendingTasks.push(...planned);
|
||||
}
|
||||
|
||||
// 取出下一个任务
|
||||
const task = pendingTasks.shift();
|
||||
|
||||
// 执行任务
|
||||
const result = await executeTask(task, context);
|
||||
|
||||
// 处理执行结果
|
||||
if (result?.state) state = result.state; // 更新状态
|
||||
if (result?.prepend) pendingTasks.unshift(...result.prepend); // 前置任务
|
||||
if (result?.enqueue) pendingTasks.push(...result.enqueue); // 后置任务
|
||||
if (result?.stopReason) stopReason = result.stopReason; // 停止原因
|
||||
|
||||
// 保存 checkpoint(支持失败恢复)
|
||||
saveCheckpoint(sessionId, { state, pendingTasks, stopReason });
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**3. 恢复机制** (`continueExisting`):
|
||||
- 从 SQLite 加载持久化的 checkpoint
|
||||
- 恢复 `state` 和 `pendingTasks`
|
||||
- **显式忽略**旧 checkpoint 的 `stopReason`,允许从 feedback 后继续
|
||||
- 不 replay events,直接继续执行
|
||||
|
||||
### 4.8.3 任务调度与决策
|
||||
|
||||
**Planner 是决策中枢**,根据当前 state 动态决定下一步:
|
||||
|
||||
```typescript
|
||||
private planTasks(context: KernelPlanningContext): KernelTask[] {
|
||||
// 阶段1: 前置条件检查(顺序执行)
|
||||
if (!context.state.workspacePath) {
|
||||
return [{ kind: 'skill', name: 'prepare_workspace' }];
|
||||
}
|
||||
if (!context.state.context) {
|
||||
return [{ kind: 'skill', name: 'build_context' }];
|
||||
}
|
||||
|
||||
// 阶段2: 上下文压缩决策
|
||||
if (shouldCompress(context)) {
|
||||
return [{ kind: 'skill', name: 'compress_context' }];
|
||||
}
|
||||
|
||||
// 阶段3: Triage 决策(生成自主审查提示)
|
||||
if (!context.state.triage) {
|
||||
return [{ kind: 'subagent', name: 'review:triage' }];
|
||||
}
|
||||
|
||||
// 阶段4: 单次完整自主审查
|
||||
if (!context.state.reviewCompleted) {
|
||||
return [{ kind: 'subagent', name: 'review:full_review' }];
|
||||
}
|
||||
|
||||
// 阶段5: 发布与收尾
|
||||
if (!context.state.published) {
|
||||
return [{ kind: 'skill', name: 'publish_review' }];
|
||||
}
|
||||
|
||||
return []; // 完成
|
||||
}
|
||||
```
|
||||
|
||||
**决策依据**:
|
||||
- **当前 State**: `triage`, `reviewCompleted`, `findings`, `published`, `reviewedRefSaved` 等字段
|
||||
- **Tags/Capabilities**: 按标签选择 subagent(`filterByTag('triage')`),非硬编码
|
||||
- **Config 开关**: 审查引擎、工作区、命令白名单等运行配置
|
||||
|
||||
### 4.8.4 Skills 与 Subagents 调用机制
|
||||
|
||||
**Skills - 原子任务**:
|
||||
|
||||
```typescript
|
||||
// 注册 Skills
|
||||
this.skillRegistry.register(createPrepareWorkspaceSkill());
|
||||
this.skillRegistry.register(createBuildContextSkill());
|
||||
|
||||
// Skill 定义
|
||||
{
|
||||
kind: 'skill',
|
||||
name: 'build_context',
|
||||
execute: async (task, context) => {
|
||||
// 执行业务逻辑
|
||||
const reviewContext = await diffExtractor.buildContext(...);
|
||||
|
||||
return {
|
||||
state: { ...context.state, context: reviewContext }, // 更新状态
|
||||
// 可选控制流
|
||||
prepend: [], // 在当前任务前插入新任务
|
||||
enqueue: [], // 在当前任务后追加新任务
|
||||
stopReason: undefined // 或 'completed', 'failed', 'awaiting_human_feedback'
|
||||
};
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Subagents - 委派执行**:
|
||||
|
||||
```typescript
|
||||
// 调用路径
|
||||
AgentKernelRunner → KernelAgentInvoker.invoke(task, context)
|
||||
→ 创建 invocation record
|
||||
→ 执行 subagent.execute(task, agentContext)
|
||||
→ 完成 invocation,返回结果
|
||||
```
|
||||
|
||||
```typescript
|
||||
// Subagent 执行上下文
|
||||
const agentContext: KernelAgentExecutionContext = {
|
||||
...context,
|
||||
agent, // subagent 定义
|
||||
delegation: { // 委派包
|
||||
goal: agent.whenToUse,
|
||||
parentTaskName: task.name,
|
||||
input: task.input,
|
||||
contextSummary: state.compressedContext?.summary // 压缩摘要回注
|
||||
}
|
||||
};
|
||||
|
||||
// 执行(带 AsyncLocalStorage 隔离)
|
||||
const result = await runWithKernelAgentContext(
|
||||
{ agentId, parentSessionId, agentType: 'subagent', ... },
|
||||
() => agent.execute(task, agentContext)
|
||||
);
|
||||
```
|
||||
|
||||
### 4.8.5 Tools 调用机制
|
||||
|
||||
**调用路径**(在 `review:full_review` 内部):
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant FullReview as AutonomousReviewAgent
|
||||
participant Loop as Autonomous Loop
|
||||
participant Orchestration as ToolOrchestration
|
||||
participant Permission as Permission Gating
|
||||
participant Hook as PreToolUse Hook
|
||||
participant Tool as Tool.execute()
|
||||
participant PostHook as PostToolUse Hook
|
||||
|
||||
FullReview->>Loop: 决定调用 tool
|
||||
Loop->>Orchestration: partitionToolCalls(tools)
|
||||
Orchestration->>Permission: evaluateToolPermission(tool)
|
||||
Permission-->>Orchestration: allow/ask/deny
|
||||
Orchestration->>Hook: runKernelHooks(PreToolUse)
|
||||
Hook-->>Orchestration: additionalContext/updatedInput
|
||||
Orchestration->>Tool: tool.execute(args)
|
||||
Tool-->>Orchestration: result
|
||||
Orchestration->>PostHook: runKernelHooks(PostToolUse)
|
||||
PostHook-->>Orchestration: -
|
||||
Orchestration-->>Loop: toolResult
|
||||
Loop-->>FullReview: 更新 diagnostics/findings
|
||||
```
|
||||
|
||||
**并发控制**:
|
||||
- **并发安全工具** (`isConcurrencySafe: true`): 并行执行
|
||||
- **非并发安全工具**: 串行执行
|
||||
- **权限拦截**: `PermissionRequest` Hook 可批准/阻断
|
||||
|
||||
**权限边界**:
|
||||
|
||||
| Scope | 默认行为 | 说明 |
|
||||
|-------|----------|------|
|
||||
| `read` | `allow` | 安全操作(读文件、搜索代码) |
|
||||
| `write` | `ask` | 需审批(写文件) |
|
||||
| `command` | `ask` | 需审批(执行命令) |
|
||||
| `git_write` | `ask` | 需审批(Git 操作) |
|
||||
| `network` | `deny` | 禁止网络访问 |
|
||||
| `cross_session` | `deny` | 禁止跨 session 操作 |
|
||||
|
||||
### 4.8.6 代码审查结合流程
|
||||
|
||||
**完整数据流**:
|
||||
|
||||
```
|
||||
Webhook → PR/Commit
|
||||
↓
|
||||
prepare_workspace → 克隆仓库、准备 mirror/workspace
|
||||
↓
|
||||
build_context → 提取 diff、文件内容、构建 ReviewContext
|
||||
↓
|
||||
compress_context (可选) → 大上下文自动压缩,生成 summary
|
||||
↓
|
||||
review:triage → 生成自主审查提示、模式和预算
|
||||
↓
|
||||
review:full_review → 单个自主代理跨文件调查,生成 findings
|
||||
↓
|
||||
publish_review → 发布 summary + line comments
|
||||
↓
|
||||
save_reviewed_ref → 保存审查快照(支持增量审查)
|
||||
```
|
||||
|
||||
**状态流转**:
|
||||
|
||||
```mermaid
|
||||
stateDiagram-v2
|
||||
[*] --> prepare_workspace: 启动
|
||||
prepare_workspace --> build_context: 成功
|
||||
build_context --> compress_context: 上下文过大
|
||||
build_context --> triage: 正常
|
||||
compress_context --> triage: 完成
|
||||
triage --> full_review: 提示生成完成
|
||||
full_review --> publish_review: findings 聚合完成
|
||||
publish_review --> save_reviewed_ref: 直接完成
|
||||
save_reviewed_ref --> [*]: completed
|
||||
```
|
||||
|
||||
### 4.8.7 边界划分
|
||||
|
||||
**Skills vs Subagents 边界**:
|
||||
|
||||
| 维度 | Skills | Subagents |
|
||||
|------|--------|-----------|
|
||||
| **粒度** | 原子操作(准备环境、构建上下文、发布) | 复杂推理(规划、完整审查) |
|
||||
| **模型** | 通常不涉及 LLM | 必须调用 LLM(planner/specialist) |
|
||||
| **并发** | 顺序执行 | 通过单个 full review 代理内部自主工具调用实现调查 |
|
||||
| **状态** | 修改 state 字段 | 可修改 state,主要产出 hints/findings/diagnostics |
|
||||
| **失败** | 阻断整个流程 | 可单独重试或降级 |
|
||||
| **示例** | prepare_workspace, publish_review | review:triage, review:full_review |
|
||||
|
||||
**Runtime vs Runner 边界**:
|
||||
|
||||
| 组件 | 职责 | 不做什么 |
|
||||
|------|------|----------|
|
||||
| **AgentKernelRunner** | 通用调度、checkpoint、task 循环 | 不感知 Review 业务逻辑 |
|
||||
| **ReviewKernelRuntime** | Review 业务封装、skills、subagents、hooks | 不直接调度任务(委托给 runner) |
|
||||
|
||||
**Subagents 间边界**:
|
||||
|
||||
| Subagent | 输入 | 输出 | 边界限制 |
|
||||
|----------|------|------|----------|
|
||||
| **triage** | ReviewContext | review hints + budget | 只生成提示,不审查 |
|
||||
| **full_review** | ReviewTask + context | findings[] + diagnostics | 一次完整自主审查,不预拆域或文件 |
|
||||
|
||||
**Hook 介入边界**:
|
||||
|
||||
```typescript
|
||||
// 在关键生命周期点介入
|
||||
SessionStart // session 启动时
|
||||
SubagentStart // subagent 启动时
|
||||
PreToolUse // 工具调用前(可修改输入、阻断)
|
||||
PermissionRequest // 权限请求时(决定 allow/ask/deny)
|
||||
PostToolUse // 工具调用成功后
|
||||
PostToolUseFailure // 工具调用失败后
|
||||
```
|
||||
|
||||
**Session 隔离边界**:
|
||||
|
||||
- 每个 PR/Commit 对应独立 session
|
||||
- session 间 state 不共享
|
||||
- tool 默认禁止 cross_session 操作
|
||||
- subagent invocation 绑定 parentSessionId
|
||||
|
||||
---
|
||||
|
||||
## 5. 运行时与状态设计
|
||||
|
||||
### 5.1 Session 与 Checkpoint
|
||||
|
||||
每条 PR/commit 审查对应一个 kernel session:
|
||||
|
||||
| 数据 | 用途 |
|
||||
|---|---|
|
||||
| `KernelSessionRecord` | 记录 scopeType、scopeKey、metadata、lastRunId |
|
||||
| `KernelSessionEventRecord` | append-only 事件流,记录 run/task/hook/feedback 生命周期 |
|
||||
| `KernelCheckpoint<TState>` | 持久化 state、pendingTasks、stopReason |
|
||||
| `KernelSubagentInvocationRecord` | 记录每次 subagent 委派调用 |
|
||||
|
||||
恢复语义:
|
||||
|
||||
- `continueExisting=true` 时从 persisted checkpoint 恢复 `state + pendingTasks`;
|
||||
- 显式忽略旧 checkpoint 的 stopReason,允许 feedback 后继续推进;
|
||||
- 当前不 replay session events 重建 state,event 主要用于投影与审计。
|
||||
|
||||
### 5.2 ReviewKernelState
|
||||
|
||||
核心状态包括:
|
||||
|
||||
| 字段 | 说明 |
|
||||
|---|---|
|
||||
| `targetSha` | 当前审查目标 commit |
|
||||
| `mirrorPath/workspacePath` | 本地仓库与工作区路径 |
|
||||
| `context` | `ReviewContext`,包含 diff、changedFiles、fileContents 等 |
|
||||
| `projectPrompt` | 仓库级审查 prompt |
|
||||
| `compressedContext` | 自动压缩摘要及 token 元数据 |
|
||||
| `triage/reviewTask/reviewCompleted` | 自主审查提示、预算与完成状态 |
|
||||
| `findings` | subagents 收集到的问题 |
|
||||
| `reviewDiagnostics` | full review 工具调用、停止原因、解析计数等诊断信息 |
|
||||
| `published/reviewedRefSaved` | 发布与审查快照保存状态位 |
|
||||
|
||||
### 5.3 Subagent Invocation
|
||||
|
||||
每次 subagent 调用会持久化:
|
||||
|
||||
| 字段 | 说明 |
|
||||
|---|---|
|
||||
| `parent_session_id` | 父 session |
|
||||
| `parent_run_id` | 当前 review run |
|
||||
| `parent_task_name` | 触发该调用的 task name |
|
||||
| `subagent_name` | subagent id,例如 `review:triage` |
|
||||
| `agent_id` | 本次调用唯一 agent identity |
|
||||
| `status` | running / completed / failed |
|
||||
| `input_json` | delegation packet |
|
||||
| `result_json` | structured invocation result |
|
||||
|
||||
失败处理:
|
||||
|
||||
- invoker 将 invocation 标记为 `failed`;
|
||||
- runner 写入 `task_failed` event;
|
||||
- checkpoint 保存当前 state 与 `[failedTask, ...pendingTasks]`,stopReason=`failed`;
|
||||
- 调用方可根据 checkpoint 与错误信息决定重试/人工介入。
|
||||
|
||||
### 5.4 上下文压缩与回注
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant P as Planner
|
||||
participant C as ContextCompressionService
|
||||
participant S as Session Checkpoint
|
||||
participant A as Subagent
|
||||
|
||||
P->>C: shouldCompress(context, compressedContext)
|
||||
C-->>P: true when tokenEstimate >= contextWindow * 0.8
|
||||
P->>C: compress(context, projectPrompt)
|
||||
C-->>S: compressedContext(summary, token stats, model, timestamp)
|
||||
P->>A: invoke subagent with contextSummary
|
||||
A-->>A: prompt includes compressed summary
|
||||
```
|
||||
|
||||
压缩触发阈值:
|
||||
|
||||
- 使用 `tokenCounter.getContextWindow(plannerModel)` 获取模型上下文窗口;
|
||||
- 取 80% 作为触发阈值,预留 20% 冗余;
|
||||
- 若无法获取模型配置,兜底使用默认窗口。
|
||||
|
||||
### 5.5 Hooks 与 Permission
|
||||
|
||||
内置 hooks:
|
||||
|
||||
| Hook | Event | 作用 |
|
||||
|---|---|---|
|
||||
| `kernel:session-start-audit` | `SessionStart` | 写入 `hook_session_start` event |
|
||||
| `kernel:subagent-start-audit` | `SubagentStart` | 写入 `hook_subagent_start` event |
|
||||
| `kernel:pre-tool-audit` | `PreToolUse` | 为工具调用追加审计上下文 |
|
||||
| `kernel:permission-request-audit` | `PermissionRequest` | 记录权限请求上下文 |
|
||||
|
||||
工具权限默认策略:
|
||||
|
||||
| Scope | 默认行为 |
|
||||
|---|---|
|
||||
| `read` | allow |
|
||||
| `write` | ask |
|
||||
| `command` | ask |
|
||||
| `git_write` | ask |
|
||||
| `network` | deny |
|
||||
| `cross_session` | deny |
|
||||
|
||||
---
|
||||
|
||||
## 6. API 与管理后台可观测性
|
||||
|
||||
### 6.1 Admin API
|
||||
|
||||
| API | 说明 |
|
||||
|---|---|
|
||||
| `GET /admin/api/review/sessions` | 返回 session 列表与 summary |
|
||||
| `GET /admin/api/review/sessions/:sessionId` | 返回 session、summary、checkpoint、plan、timeline、events、subagentInvocations、runDetails |
|
||||
| `GET /admin/api/review/kernel/tasks` | 返回 skill + subagent task catalog |
|
||||
| `GET /admin/api/review/kernel/subagents` | 返回 subagent catalog |
|
||||
| `GET /admin/api/review/kernel/hooks` | 返回 hook catalog |
|
||||
|
||||
### 6.2 Subagent Catalog 响应字段
|
||||
|
||||
```json
|
||||
{
|
||||
"kind": "subagent",
|
||||
"name": "review:full_review",
|
||||
"source": "built-in",
|
||||
"description": "执行一次完整自主代码审查",
|
||||
"whenToUse": "当 triage 生成审查提示后执行完整审查",
|
||||
"modelRole": "specialist",
|
||||
"tags": ["review", "specialist", "full-review", "autonomous-review"],
|
||||
"resumable": true
|
||||
}
|
||||
```
|
||||
|
||||
### 6.3 管理后台展示建议
|
||||
|
||||
管理后台应采用双层控制面:
|
||||
|
||||
- 上层:Kernel Subagents 目录,展示 built-in/custom/plugin subagents;
|
||||
- 下层:模型角色路由,配置 `planner / specialist` 到 provider/model。
|
||||
|
||||
展示字段建议:
|
||||
|
||||
| 区域 | 字段 |
|
||||
|---|---|
|
||||
| Subagent 目录 | name、source、description、whenToUse、modelRole、tags、resumable |
|
||||
| Session 详情 | summary、plan、timeline、findings、comments、subagentInvocations |
|
||||
| Invocation 详情 | agentId、status、startedAt、finishedAt、summary、artifacts |
|
||||
|
||||
---
|
||||
|
||||
## 7. 非功能性设计
|
||||
|
||||
### 7.1 安全设计
|
||||
|
||||
- 工具调用统一走 permission gating,避免 subagent 绕过权限策略;
|
||||
- 高风险工具默认 ask/deny,不允许直接执行网络、跨 session 或写操作;
|
||||
- hooks 可作为后续审批、审计、通知与策略扩展点;
|
||||
- LLM prompt 不作为安全边界,所有外部副作用必须由 tool/skill/adapters 承载。
|
||||
|
||||
### 7.2 高可用与恢复
|
||||
|
||||
- 每个 task 完成后保存 checkpoint,降低失败后的重复工作;
|
||||
- subagent invocation 失败会记录 failed 状态,便于定位失败代理;
|
||||
- feedback 后通过 `continueExisting` 从 checkpoint 继续;
|
||||
- publish 与 save reviewed ref 分离,避免评论发布与 ref 保存互相污染;
|
||||
- cleanup workspace 放在 runtime finally 中执行,降低资源泄漏风险。
|
||||
|
||||
### 7.3 可观测性
|
||||
|
||||
- session event 记录 run/task/hook/feedback 生命周期;
|
||||
- subagent invocation 记录 parent-child 委派关系;
|
||||
- admin projection 汇总 plan/timeline/currentStep/findingCount/pendingTaskCount;
|
||||
- compression 记录 sourceTokenEstimate、summaryTokenEstimate、triggerThreshold、model。
|
||||
|
||||
### 7.4 性能与容量
|
||||
|
||||
- 大 diff 先经 diff extractor/token budget 裁剪,再由 compression service 做会话级摘要;
|
||||
- `review:full_review` 在单个自主循环内使用工具逐步调查,避免运行时预拆 domain 或文件;
|
||||
- tool orchestration 可并发执行 read-only 工具,非并发安全工具串行;
|
||||
- session/event/checkpoint 使用 SQLite,适合当前单体部署;未来高并发可迁移到外部数据库。
|
||||
|
||||
### 7.5 可维护性与扩展性
|
||||
|
||||
- 新增内置 Agent 应只新增 `KernelSubagentDefinition` 并打 tags;
|
||||
- 新增流程副作用应优先实现 skill/adapters;
|
||||
- 新增横切逻辑应优先实现 hook;
|
||||
- 新增工具必须声明 permissionScope 和 isConcurrencySafe。
|
||||
|
||||
---
|
||||
|
||||
## 8. 测试与上线验证
|
||||
|
||||
### 8.1 自动化测试分层
|
||||
|
||||
| 层级 | 测试文件 | 覆盖点 |
|
||||
|---|---|---|
|
||||
| Unit | `src/review/kernel/__tests__/session-read-model.test.ts` | session summary/plan/timeline 投影 |
|
||||
| Unit | `src/review/tools/__tests__/tool-permissions.test.ts` | permission scope 默认策略 |
|
||||
| Contract | `src/agent-kernel/hooks/__tests__/kernel-hook-runner.test.ts` | hook 聚合、approve/block、updatedInput |
|
||||
| Integration | `src/controllers/__tests__/admin-review-sessions.test.ts` | admin session 与 catalog API |
|
||||
| Integration | `src/controllers/__tests__/feedback-kernel-session.test.ts` | feedback approve/reject/rollback/continue |
|
||||
| Runtime | `src/review/kernel/__tests__/runtime-happy-path.test.ts` | 完整 runtime happy path |
|
||||
| Runtime | `src/review/kernel/__tests__/runtime-feedback-resume.test.ts` | awaiting feedback 后恢复 |
|
||||
| Runtime | `src/review/kernel/__tests__/runtime-replay-invariants.test.ts` | checkpoint/resume/replay 不变量 |
|
||||
| Runtime | `src/review/kernel/__tests__/runtime-concurrency-idempotency.test.ts` | 并发上限与幂等 |
|
||||
| Canary | `src/review/kernel/__tests__/compression-resumability.test.ts` | 压缩恢复与生产关键 canary |
|
||||
|
||||
### 8.2 上线前门禁
|
||||
|
||||
必须通过:
|
||||
|
||||
```bash
|
||||
bun run lint
|
||||
bun run build
|
||||
bun test src/review/kernel/__tests__ src/review/tools/__tests__ src/controllers/__tests__ src/agent-kernel/hooks/__tests__
|
||||
bun test
|
||||
```
|
||||
|
||||
关键验收信号:
|
||||
|
||||
- runtime happy path 完成,stopReason=`completed`;
|
||||
- feedback resume 从 `awaiting_human_feedback` 恢复到 completed;
|
||||
- compression resume 保留 targetSha、pending boundary、invocation boundary、summary;
|
||||
- permission deny 不会绕过工具治理;
|
||||
- duplicate enqueue/continue/feedback 不产生重复有效工作;
|
||||
- admin session detail 能看到 plan/timeline/subagentInvocations。
|
||||
|
||||
### 8.3 灰度与回滚
|
||||
|
||||
- 配置默认:`REVIEW_ENGINE=kernel`;
|
||||
- 若需要回滚,可临时切到 `codex` 引擎,但旧固定 agent 编排不再作为主路径;
|
||||
- 灰度期间重点观察 session stopReason 分布、task_failed 事件、subagent failed invocations、feedback resume 成功率。
|
||||
|
||||
---
|
||||
|
||||
## 9. 风险、待确认与后续演进
|
||||
|
||||
### 9.1 风险与应对
|
||||
|
||||
| 风险 | 影响 | 应对 |
|
||||
|---|---|---|
|
||||
| Built-in definitions 仍在代码中 | 扩展仍需发版 | 下一阶段引入 plugin/custom subagent loader |
|
||||
| SQLite 单文件并发能力有限 | 高并发 session 下写入竞争 | 当前单体可接受;未来迁移外部 DB 或队列化写入 |
|
||||
| Compression summary 可能遗漏细节 | 后续 subagent 判断偏差 | 保留 recent context + summary;测试锁定关键事实不丢 |
|
||||
| Hook 阻断策略过强或过弱 | 工具误阻断或越权 | permission matrix 测试 + 审计 event + 管理后台策略展示 |
|
||||
|
||||
### 9.2 后续演进计划
|
||||
|
||||
1. **Plugin-based Subagent Loading**:支持从目录或配置加载 custom/plugin subagents。
|
||||
2. **Child Session Tree**:为长任务或后台 subagent 引入 child session/resume tree。
|
||||
3. **Attachment Reinjection**:压缩后恢复文件附件、计划附件和技能附件。
|
||||
4. **更细粒度权限模型**:支持仓库级、工具级、用户级策略配置。
|
||||
5. **Subagent 版本治理**:为 built-in/custom/plugin subagents 增加 version、enabled、rollout 字段。
|
||||
|
||||
### 9.3 评审清单
|
||||
|
||||
- [ ] 内置 Agent 是否都通过 registry/invoker 调用,而不是 runtime 硬编码实例?
|
||||
- [ ] planner 是否按 tag/capability 选择 subagent?
|
||||
- [ ] 每次 subagent 调用是否有 invocation record?
|
||||
- [ ] feedback 后 continue 是否从 checkpoint 恢复?
|
||||
- [ ] 压缩 summary 是否持久化并回注 triage/full_review?
|
||||
- [ ] 工具执行是否经过 permission/hook/orchestration?
|
||||
- [ ] 管理后台是否能展示 catalog、timeline、invocations?
|
||||
- [ ] 生产测试门禁是否覆盖 happy path、失败恢复、幂等和 canary?
|
||||
|
||||
---
|
||||
|
||||
## 版本记录
|
||||
|
||||
| 版本 | 日期 | 说明 |
|
||||
|---|---|---|
|
||||
| v0.1 | 2026-04-28 | 初版:记录 Kernel 内置 Agent 架构、运行链路、可观测性与测试门禁 |
|
||||
@@ -11,6 +11,8 @@ RUN bun install --no-frozen-lockfile
|
||||
COPY src ./src
|
||||
COPY tsconfig.json .
|
||||
|
||||
COPY frontend/dist ./public
|
||||
|
||||
EXPOSE 5174
|
||||
|
||||
CMD ["bun", "run", "start"]
|
||||
|
||||
169
e2e/__tests__/e2e-review.test.ts
Normal file
169
e2e/__tests__/e2e-review.test.ts
Normal file
@@ -0,0 +1,169 @@
|
||||
import { afterAll, beforeAll, describe, expect, test } from 'bun:test';
|
||||
import {
|
||||
E2ETestHarness,
|
||||
type Finding,
|
||||
type Scenario,
|
||||
type SessionDetail,
|
||||
} from './e2e-test-harness';
|
||||
|
||||
function assertFindingsMatchScenario(findings: Finding[], scenario: Scenario): void {
|
||||
expect(findings.length).toBeGreaterThanOrEqual(scenario.minFindings);
|
||||
|
||||
if (scenario.maxFindings !== undefined) {
|
||||
expect(findings.length).toBeLessThanOrEqual(scenario.maxFindings);
|
||||
}
|
||||
|
||||
const highSeverityCount = findings.filter((finding) => finding.severity === 'high').length;
|
||||
expect(highSeverityCount).toBeGreaterThanOrEqual(scenario.minHighSeverity);
|
||||
|
||||
const fingerprints = findings
|
||||
.map((finding) => finding.fingerprint)
|
||||
.filter((value): value is string => Boolean(value));
|
||||
expect(new Set(fingerprints).size).toBe(fingerprints.length);
|
||||
}
|
||||
|
||||
function expectPipelineStepsCompleted(detail: SessionDetail): void {
|
||||
const statusesByKey = new Map(detail.plan.map((step) => [step.key, step.status]));
|
||||
expect(statusesByKey.get('prepare_workspace')).toBe('completed');
|
||||
expect(statusesByKey.get('build_context')).toBe('completed');
|
||||
expect(statusesByKey.get('review:triage')).toBe('completed');
|
||||
expect(statusesByKey.get('review:full_review')).toBe('completed');
|
||||
expect(statusesByKey.get('aggregate_findings')).toBe('completed');
|
||||
expect(statusesByKey.get('publish_review')).toBe('completed');
|
||||
expect(statusesByKey.get('save_reviewed_ref')).toBe('completed');
|
||||
}
|
||||
|
||||
function expectAutonomousFullReviewPipeline(detail: SessionDetail): void {
|
||||
const fullReviewInvocations = detail.subagentInvocations.filter(
|
||||
(invocation) => invocation.subagentName === 'review:full_review'
|
||||
);
|
||||
expect(fullReviewInvocations).toHaveLength(1);
|
||||
expect(fullReviewInvocations[0].status).toBe('completed');
|
||||
expect(detail.checkpoint?.state?.reviewCompleted).toBe(true);
|
||||
expect(detail.checkpoint?.state?.published).toBe(true);
|
||||
expect(detail.checkpoint?.state?.reviewedRefSaved).toBe(true);
|
||||
expect(detail.checkpoint?.state?.reviewDiagnostics?.toolCallNames).toEqual([
|
||||
'search_code',
|
||||
'read_file',
|
||||
'read_file',
|
||||
]);
|
||||
expect(detail.checkpoint?.state?.reviewDiagnostics?.stopReason).toBe('modelFinalized');
|
||||
|
||||
const findings = detail.checkpoint?.state?.findings ?? [];
|
||||
expect(findings.length).toBeGreaterThan(0);
|
||||
expect(findings[0].detail).toContain('auth/user model');
|
||||
expect(findings[0].evidence).toContain('src/auth.ts');
|
||||
|
||||
const publishedComments = detail.runDetails?.comments?.filter(
|
||||
(comment) => comment.status === 'published'
|
||||
);
|
||||
expect(publishedComments?.length).toBeGreaterThan(0);
|
||||
expect(publishedComments?.some((comment) => !comment.path)).toBe(true);
|
||||
expect(publishedComments?.some((comment) => comment.path === 'src/user-handler.ts')).toBe(true);
|
||||
}
|
||||
|
||||
describe('E2E Review Flow', () => {
|
||||
const harness = new E2ETestHarness();
|
||||
|
||||
beforeAll(async () => {
|
||||
await harness.start();
|
||||
await harness.seedGitea();
|
||||
}, 90_000);
|
||||
|
||||
afterAll(async () => {
|
||||
await harness.stop();
|
||||
});
|
||||
|
||||
test('核心链路验证: webhook → clone → triage → full_review → aggregate → publish → save ref → Gitea has comments', async () => {
|
||||
const { owner, repo, prNumber } = await harness.seedPR('simple-bug-pr');
|
||||
|
||||
const webhookResponse = await harness.triggerWebhook(owner, repo, prNumber);
|
||||
expect(webhookResponse.status).toBe('accepted');
|
||||
|
||||
const result = await harness.waitForReview(owner, repo, prNumber, 120);
|
||||
expect(result.completed).toBe(true);
|
||||
expect(result.sessionState).toBe('completed');
|
||||
expectPipelineStepsCompleted(result.detail);
|
||||
expect(result.detail.checkpoint?.state?.published).toBe(true);
|
||||
expectAutonomousFullReviewPipeline(result.detail);
|
||||
|
||||
const comments = await harness.getGiteaComments(owner, repo, prNumber);
|
||||
expect(comments.length).toBeGreaterThan(0);
|
||||
}, 150_000);
|
||||
|
||||
test('状态正确性: session status transitions and checkpoint consistency', async () => {
|
||||
const { owner, repo, prNumber } = await harness.seedPR('security-pr');
|
||||
|
||||
await harness.triggerWebhook(owner, repo, prNumber);
|
||||
const snapshot = await harness.waitForSessionSnapshot(owner, repo, prNumber, 30);
|
||||
expect(['queued', 'planning', 'executing', 'completed']).toContain(
|
||||
snapshot.detail.summary.status
|
||||
);
|
||||
|
||||
const result = await harness.waitForReview(owner, repo, prNumber, 120);
|
||||
expect(['queued', 'planning', 'executing', 'completed']).toContain(result.observedStates[0]);
|
||||
expect(result.sessionState).toBe('completed');
|
||||
expect(result.detail.checkpoint?.stopReason).toBe('completed');
|
||||
expect(result.detail.checkpoint?.pendingTasks ?? []).toHaveLength(0);
|
||||
expect(result.detail.summary.findingCount).toBe(harness.extractFindings(result.detail).length);
|
||||
}, 150_000);
|
||||
|
||||
test('Findings 质量: fixtures trigger expected triage modes, autonomous full review, and finding counts', async () => {
|
||||
const fixtureNames = ['simple-bug-pr', 'minimal-change-pr'];
|
||||
|
||||
for (const fixtureName of fixtureNames) {
|
||||
const { owner, repo, prNumber, scenario } = await harness.seedPR(fixtureName);
|
||||
await harness.triggerWebhook(owner, repo, prNumber);
|
||||
const result = await harness.waitForReview(owner, repo, prNumber, 120);
|
||||
expect(result.sessionState).toBe('completed');
|
||||
|
||||
const triageMode = harness.extractTriageMode(result.detail);
|
||||
if (triageMode !== undefined) {
|
||||
expect(triageMode).toBe(scenario.expectedTriageMode);
|
||||
}
|
||||
|
||||
expectPipelineStepsCompleted(result.detail);
|
||||
expect(result.detail.subagentInvocations).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({ subagentName: 'review:full_review', status: 'completed' }),
|
||||
])
|
||||
);
|
||||
|
||||
assertFindingsMatchScenario(harness.extractFindings(result.detail), scenario);
|
||||
}
|
||||
}, 360_000);
|
||||
|
||||
test('幂等性: duplicate webhook does not create duplicate comments', async () => {
|
||||
const { owner, repo, prNumber } = await harness.seedPR('duplicate-webhook-pr');
|
||||
|
||||
await harness.triggerWebhook(owner, repo, prNumber);
|
||||
const firstResult = await harness.waitForReview(owner, repo, prNumber, 120);
|
||||
expect(firstResult.sessionState).toBe('completed');
|
||||
const firstComments = await harness.getGiteaComments(owner, repo, prNumber);
|
||||
expect(firstComments.length).toBeGreaterThan(0);
|
||||
|
||||
const duplicateWebhookResponse = await harness.triggerWebhook(owner, repo, prNumber);
|
||||
expect(['accepted', 'deduplicated']).toContain(duplicateWebhookResponse.status);
|
||||
const secondResult = await harness.waitForReview(owner, repo, prNumber, 60);
|
||||
expect(secondResult.sessionId).toBe(firstResult.sessionId);
|
||||
const secondComments = await harness.getGiteaComments(owner, repo, prNumber);
|
||||
|
||||
expect(secondComments.length).toBe(firstComments.length);
|
||||
expect(new Set(secondComments.map((comment) => comment.body)).size).toBe(
|
||||
new Set(firstComments.map((comment) => comment.body)).size
|
||||
);
|
||||
}, 180_000);
|
||||
|
||||
test('错误恢复: clone failure marks session failed, not stuck', async () => {
|
||||
const { owner, repo, prNumber } = await harness.seedPR('clean-refactor-pr');
|
||||
|
||||
await harness.triggerWebhook(owner, repo, prNumber, {
|
||||
repositoryPatch: {
|
||||
clone_url: `http://invalid-host-99999.local/${owner}/${repo}-missing.git`,
|
||||
},
|
||||
});
|
||||
|
||||
const result = await harness.waitForReview(owner, repo, prNumber, 120);
|
||||
expect(['completed', 'failed']).toContain(result.sessionState);
|
||||
}, 150_000);
|
||||
});
|
||||
748
e2e/__tests__/e2e-test-harness.ts
Normal file
748
e2e/__tests__/e2e-test-harness.ts
Normal file
@@ -0,0 +1,748 @@
|
||||
import { createHmac } from 'node:crypto';
|
||||
import { existsSync, mkdirSync, mkdtempSync, rmSync } from 'node:fs';
|
||||
import { readFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import path from 'node:path';
|
||||
|
||||
const ENCRYPTION_KEY = '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef';
|
||||
const WEBHOOK_SECRET = 'e2e-test-webhook-secret';
|
||||
const TERMINAL_STATES = new Set(['completed', 'failed', 'ignored', 'cancelled', 'error']);
|
||||
|
||||
type JsonPrimitive = string | number | boolean | null;
|
||||
type JsonValue = JsonPrimitive | JsonValue[] | { [key: string]: JsonValue };
|
||||
|
||||
interface GiteaUser {
|
||||
login: string;
|
||||
full_name?: string;
|
||||
}
|
||||
|
||||
interface GiteaRepo {
|
||||
id: number;
|
||||
name: string;
|
||||
full_name: string;
|
||||
clone_url: string;
|
||||
html_url: string;
|
||||
ssh_url?: string;
|
||||
owner: GiteaUser;
|
||||
}
|
||||
|
||||
interface GiteaPullRequest {
|
||||
id: number;
|
||||
number: number;
|
||||
title: string;
|
||||
html_url: string;
|
||||
head: {
|
||||
ref: string;
|
||||
sha: string;
|
||||
repo?: GiteaRepo;
|
||||
};
|
||||
base: {
|
||||
ref: string;
|
||||
sha: string;
|
||||
repo?: GiteaRepo;
|
||||
};
|
||||
requested_reviewers?: GiteaUser[];
|
||||
user?: GiteaUser;
|
||||
}
|
||||
|
||||
interface Scenario {
|
||||
name: string;
|
||||
description: string;
|
||||
expectedTriageMode: string;
|
||||
expectedDomains: string[];
|
||||
minFindings: number;
|
||||
maxFindings?: number;
|
||||
minHighSeverity: number;
|
||||
testIdempotency?: boolean;
|
||||
}
|
||||
|
||||
interface AdminLoginResponse {
|
||||
token: string;
|
||||
}
|
||||
|
||||
interface SessionSummary {
|
||||
sessionId: string;
|
||||
owner?: string;
|
||||
repo?: string;
|
||||
prNumber?: number;
|
||||
status: string;
|
||||
findingCount: number;
|
||||
}
|
||||
|
||||
interface SessionListEntry {
|
||||
session: {
|
||||
id: string;
|
||||
metadata?: Record<string, JsonValue>;
|
||||
};
|
||||
summary: SessionSummary;
|
||||
}
|
||||
|
||||
interface SessionListResponse {
|
||||
data: SessionListEntry[];
|
||||
}
|
||||
|
||||
interface Finding {
|
||||
severity?: string;
|
||||
confidence?: number;
|
||||
path?: string;
|
||||
line?: number;
|
||||
title?: string;
|
||||
detail?: string;
|
||||
evidence?: string;
|
||||
category?: string;
|
||||
domain?: string;
|
||||
fingerprint?: string;
|
||||
}
|
||||
|
||||
interface SessionDetail {
|
||||
session: {
|
||||
id: string;
|
||||
metadata?: Record<string, JsonValue>;
|
||||
};
|
||||
summary: SessionSummary;
|
||||
checkpoint: {
|
||||
stopReason?: string;
|
||||
pendingTasks?: Array<{ name: string }>;
|
||||
state?: {
|
||||
targetSha?: string;
|
||||
triage?: {
|
||||
mode?: string;
|
||||
domains?: string[];
|
||||
};
|
||||
triageMode?: string;
|
||||
findings?: Finding[];
|
||||
published?: boolean;
|
||||
reviewedRefSaved?: boolean;
|
||||
reviewCompleted?: boolean;
|
||||
reviewedRef?: string;
|
||||
reviewDiagnostics?: {
|
||||
toolCallNames?: string[];
|
||||
toolCallCount?: number;
|
||||
parsedFindingCount?: number;
|
||||
stopReason?: string;
|
||||
};
|
||||
};
|
||||
} | null;
|
||||
plan: Array<{ key: string; status: string; label: string }>;
|
||||
events: Array<{ eventType: string; payload: Record<string, JsonValue> }>;
|
||||
runDetails: {
|
||||
findings?: Finding[];
|
||||
comments?: Array<{
|
||||
status?: string;
|
||||
path?: string;
|
||||
line?: number;
|
||||
body?: string;
|
||||
fingerprint?: string;
|
||||
}>;
|
||||
} | null;
|
||||
subagentInvocations: Array<{
|
||||
subagentName: string;
|
||||
status: string;
|
||||
result?: Record<string, JsonValue>;
|
||||
}>;
|
||||
}
|
||||
|
||||
interface GiteaTokenResponse {
|
||||
sha1?: string;
|
||||
token?: string;
|
||||
}
|
||||
|
||||
interface CommentLike {
|
||||
id: number;
|
||||
body: string;
|
||||
path?: string;
|
||||
line?: number;
|
||||
}
|
||||
|
||||
interface SeedResult {
|
||||
owner: string;
|
||||
repo: string;
|
||||
prNumber: number;
|
||||
scenario: Scenario;
|
||||
}
|
||||
|
||||
interface ReviewWaitResult {
|
||||
completed: boolean;
|
||||
sessionState: string;
|
||||
sessionId: string;
|
||||
detail: SessionDetail;
|
||||
observedStates: string[];
|
||||
}
|
||||
|
||||
interface TriggerWebhookOptions {
|
||||
repositoryPatch?: Partial<GiteaRepo>;
|
||||
action?: string;
|
||||
}
|
||||
|
||||
export class E2ETestHarness {
|
||||
readonly giteaUrl = (process.env.E2E_GITEA_URL ?? 'http://localhost:3333').replace(/\/$/, '');
|
||||
readonly adminUser = process.env.E2E_GITEA_ADMIN_USER ?? 'e2e-admin';
|
||||
readonly adminPass = process.env.E2E_GITEA_ADMIN_PASS ?? 'e2ePassword123!';
|
||||
|
||||
private assistantProcess?: Bun.Subprocess<'pipe', 'pipe', 'pipe'>;
|
||||
private assistantPort = 43100 + Math.floor(Math.random() * 1000);
|
||||
private tempDir = mkdtempSync(path.join(tmpdir(), 'e2e-assistant-'));
|
||||
private databasePath = path.join(this.tempDir, 'assistant.db');
|
||||
private reviewWorkDir = path.join(this.tempDir, 'review-workdir');
|
||||
private adminJwt?: string;
|
||||
private giteaToken?: string;
|
||||
private repoCounter = 0;
|
||||
|
||||
get assistantUrl(): string {
|
||||
return `http://127.0.0.1:${this.assistantPort}`;
|
||||
}
|
||||
|
||||
async start(): Promise<void> {
|
||||
await this.startAssistant();
|
||||
this.adminJwt = await this.getAdminJWT();
|
||||
}
|
||||
|
||||
async stop(): Promise<void> {
|
||||
this.stopAssistant();
|
||||
}
|
||||
|
||||
async startAssistant(): Promise<void> {
|
||||
if (this.assistantProcess) return;
|
||||
|
||||
this.assistantProcess = Bun.spawn(['bun', 'run', 'src/index.ts'], {
|
||||
cwd: path.resolve(import.meta.dir, '../..'),
|
||||
stdout: 'pipe',
|
||||
stderr: 'pipe',
|
||||
env: {
|
||||
...process.env,
|
||||
E2E_MOCK_LLM: '1',
|
||||
ENCRYPTION_KEY,
|
||||
DATABASE_PATH: this.databasePath,
|
||||
REVIEW_ENGINE: 'kernel',
|
||||
PORT: String(this.assistantPort),
|
||||
LOG_LEVEL: process.env.LOG_LEVEL ?? 'error',
|
||||
},
|
||||
});
|
||||
|
||||
this.drainProcessOutput(this.assistantProcess.stdout, 'assistant stdout');
|
||||
this.drainProcessOutput(this.assistantProcess.stderr, 'assistant stderr');
|
||||
await this.waitForAssistantHealth();
|
||||
}
|
||||
|
||||
stopAssistant(): void {
|
||||
if (this.assistantProcess) {
|
||||
this.assistantProcess.kill();
|
||||
this.assistantProcess = undefined;
|
||||
}
|
||||
|
||||
if (existsSync(this.tempDir)) {
|
||||
rmSync(this.tempDir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
async seedGitea(): Promise<void> {
|
||||
await this.waitForGitea();
|
||||
await this.ensureAdminUser();
|
||||
this.giteaToken = await this.createToken();
|
||||
await this.configureAssistant();
|
||||
}
|
||||
|
||||
async seedPR(scenarioName: string): Promise<SeedResult> {
|
||||
if (!this.giteaToken) {
|
||||
await this.seedGitea();
|
||||
}
|
||||
|
||||
const scenario = await this.readScenario(scenarioName);
|
||||
const owner = this.adminUser;
|
||||
const repo = `e2e-${scenarioName.replace(/[^a-z0-9-]/gi, '-')}-${Date.now()}-${this.repoCounter++}`;
|
||||
const baseBranch = 'main';
|
||||
const featureBranch = `feature/${scenarioName}-${this.repoCounter}`;
|
||||
|
||||
await this.createRepo(repo);
|
||||
await this.pushBranchWithFiles(
|
||||
owner,
|
||||
repo,
|
||||
baseBranch,
|
||||
await this.readFixtureFiles(scenarioName, 'base'),
|
||||
`test: seed ${scenario.name} base`
|
||||
);
|
||||
await this.pushBranchWithFiles(
|
||||
owner,
|
||||
repo,
|
||||
featureBranch,
|
||||
await this.readFixtureFiles(scenarioName, 'branch'),
|
||||
`feat: ${scenario.description}`
|
||||
);
|
||||
const pr = await this.createPullRequest(
|
||||
owner,
|
||||
repo,
|
||||
scenario.description,
|
||||
featureBranch,
|
||||
baseBranch
|
||||
);
|
||||
await this.createWebhook(owner, repo);
|
||||
|
||||
return { owner, repo, prNumber: pr.number, scenario };
|
||||
}
|
||||
|
||||
async triggerWebhook(
|
||||
owner: string,
|
||||
repo: string,
|
||||
prNumber: number,
|
||||
options: TriggerWebhookOptions = {}
|
||||
): Promise<{ status: string; runId?: string }> {
|
||||
const repository = await this.giteaFetch<GiteaRepo>(`/repos/${owner}/${repo}`);
|
||||
const pullRequest = await this.giteaFetch<GiteaPullRequest>(
|
||||
`/repos/${owner}/${repo}/pulls/${prNumber}`
|
||||
);
|
||||
const normalizedRepository = this.normalizeRepoUrls({
|
||||
...repository,
|
||||
...options.repositoryPatch,
|
||||
owner: repository.owner,
|
||||
});
|
||||
const payload = {
|
||||
action: options.action ?? 'opened',
|
||||
number: prNumber,
|
||||
pull_request: {
|
||||
...pullRequest,
|
||||
head: {
|
||||
...pullRequest.head,
|
||||
repo: pullRequest.head.repo ? this.normalizeRepoUrls(pullRequest.head.repo) : undefined,
|
||||
},
|
||||
base: {
|
||||
...pullRequest.base,
|
||||
repo: pullRequest.base.repo ? this.normalizeRepoUrls(pullRequest.base.repo) : undefined,
|
||||
},
|
||||
requested_reviewers: pullRequest.requested_reviewers ?? [],
|
||||
},
|
||||
repository: normalizedRepository,
|
||||
sender: repository.owner,
|
||||
};
|
||||
const body = JSON.stringify(payload);
|
||||
const signature = createHmac('sha256', WEBHOOK_SECRET).update(body).digest('hex');
|
||||
return this.fetchJson<{ status: string; runId?: string }>(
|
||||
`${this.assistantUrl}/webhook/gitea`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'X-Gitea-Event': 'pull_request',
|
||||
'X-Gitea-Signature': signature,
|
||||
},
|
||||
body,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
async waitForReview(
|
||||
owner: string,
|
||||
repo: string,
|
||||
prNumber: number,
|
||||
timeoutSeconds = 120
|
||||
): Promise<ReviewWaitResult> {
|
||||
const deadline = Date.now() + timeoutSeconds * 1000;
|
||||
const observedStates: string[] = [];
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
const entry = await this.findSession(owner, repo, prNumber);
|
||||
if (entry) {
|
||||
const status = entry.summary.status;
|
||||
if (observedStates.at(-1) !== status) observedStates.push(status);
|
||||
const detail = await this.getSessionDetail(entry.summary.sessionId);
|
||||
const detailStatus = detail.summary.status;
|
||||
if (observedStates.at(-1) !== detailStatus) observedStates.push(detailStatus);
|
||||
|
||||
if (TERMINAL_STATES.has(detailStatus)) {
|
||||
return {
|
||||
completed: detailStatus === 'completed',
|
||||
sessionState: detailStatus,
|
||||
sessionId: entry.summary.sessionId,
|
||||
detail,
|
||||
observedStates,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
await this.sleep(2000);
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
`Timed out waiting for review ${owner}/${repo}#${prNumber}; observed states: ${observedStates.join(' -> ') || 'none'}`
|
||||
);
|
||||
}
|
||||
|
||||
async waitForSessionSnapshot(
|
||||
owner: string,
|
||||
repo: string,
|
||||
prNumber: number,
|
||||
timeoutSeconds = 30
|
||||
): Promise<{ entry: SessionListEntry; detail: SessionDetail }> {
|
||||
const deadline = Date.now() + timeoutSeconds * 1000;
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
const entry = await this.findSession(owner, repo, prNumber);
|
||||
if (entry) {
|
||||
return { entry, detail: await this.getSessionDetail(entry.summary.sessionId) };
|
||||
}
|
||||
await this.sleep(500);
|
||||
}
|
||||
|
||||
throw new Error(`Timed out waiting for session snapshot ${owner}/${repo}#${prNumber}`);
|
||||
}
|
||||
|
||||
async getAdminJWT(): Promise<string> {
|
||||
const response = await this.fetchJson<AdminLoginResponse>(
|
||||
`${this.assistantUrl}/admin/api/login`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ password: 'password' }),
|
||||
}
|
||||
);
|
||||
return response.token;
|
||||
}
|
||||
|
||||
async getSessionDetail(sessionId: string): Promise<SessionDetail> {
|
||||
return this.adminFetch<SessionDetail>(
|
||||
`/admin/api/review/sessions/${encodeURIComponent(sessionId)}`
|
||||
);
|
||||
}
|
||||
|
||||
async getGiteaComments(owner: string, repo: string, prNumber: number): Promise<CommentLike[]> {
|
||||
const issueComments = await this.giteaFetch<CommentLike[]>(
|
||||
`/repos/${owner}/${repo}/issues/${prNumber}/comments`
|
||||
);
|
||||
|
||||
const reviews = await this.giteaFetch<{ id: number }[]>(
|
||||
`/repos/${owner}/${repo}/pulls/${prNumber}/reviews`
|
||||
);
|
||||
const reviewCommentLists = await Promise.all(
|
||||
reviews.map((r) =>
|
||||
this.giteaFetch<CommentLike[]>(
|
||||
`/repos/${owner}/${repo}/pulls/${prNumber}/reviews/${r.id}/comments`
|
||||
).catch(() => [] as CommentLike[])
|
||||
)
|
||||
);
|
||||
const reviewComments = reviewCommentLists.flat();
|
||||
|
||||
return [...issueComments, ...reviewComments];
|
||||
}
|
||||
|
||||
extractFindings(detail: SessionDetail): Finding[] {
|
||||
return detail.checkpoint?.state?.findings ?? detail.runDetails?.findings ?? [];
|
||||
}
|
||||
|
||||
extractTriageMode(detail: SessionDetail): string | undefined {
|
||||
return detail.checkpoint?.state?.triage?.mode ?? detail.checkpoint?.state?.triageMode;
|
||||
}
|
||||
|
||||
extractDomains(detail: SessionDetail): string[] {
|
||||
const triageDomains = detail.checkpoint?.state?.triage?.domains;
|
||||
return triageDomains ?? [];
|
||||
}
|
||||
|
||||
private async configureAssistant(): Promise<void> {
|
||||
await this.putConfig({
|
||||
GITEA_API_URL: `${this.giteaUrl}/api/v1`,
|
||||
GITEA_ACCESS_TOKEN: this.requireToken(),
|
||||
GITEA_ADMIN_TOKEN: this.requireToken(),
|
||||
WEBHOOK_SECRET,
|
||||
REVIEW_ENGINE: 'kernel',
|
||||
REVIEW_WORKDIR: this.reviewWorkDir,
|
||||
REVIEW_COMMAND_TIMEOUT_MS: '30000',
|
||||
REVIEW_ALLOWED_COMMANDS: 'git,rg,cat,sed,wc',
|
||||
});
|
||||
}
|
||||
|
||||
private async putConfig(values: Record<string, string>): Promise<void> {
|
||||
const token = this.adminJwt ?? (await this.getAdminJWT());
|
||||
const response = await fetch(`${this.assistantUrl}/admin/api/config`, {
|
||||
method: 'PUT',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
body: JSON.stringify(values),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to configure assistant: ${response.status} ${await response.text()}`);
|
||||
}
|
||||
}
|
||||
|
||||
private async findSession(
|
||||
owner: string,
|
||||
repo: string,
|
||||
prNumber: number
|
||||
): Promise<SessionListEntry | undefined> {
|
||||
const payload = await this.adminFetch<SessionListResponse>(
|
||||
'/admin/api/review/sessions?limit=100'
|
||||
);
|
||||
return payload.data.find((entry) => {
|
||||
const metadata = entry.session.metadata ?? {};
|
||||
const metadataOwner = typeof metadata.owner === 'string' ? metadata.owner : undefined;
|
||||
const metadataRepo = typeof metadata.repo === 'string' ? metadata.repo : undefined;
|
||||
const metadataPr =
|
||||
typeof metadata.prNumber === 'number' ? metadata.prNumber : Number(metadata.prNumber);
|
||||
return (
|
||||
(entry.summary.owner ?? metadataOwner) === owner &&
|
||||
(entry.summary.repo ?? metadataRepo) === repo &&
|
||||
(entry.summary.prNumber ?? metadataPr) === prNumber
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
private async adminFetch<T>(apiPath: string): Promise<T> {
|
||||
const token = this.adminJwt ?? (await this.getAdminJWT());
|
||||
return this.fetchJson<T>(`${this.assistantUrl}${apiPath}`, {
|
||||
headers: { Authorization: `Bearer ${token}` },
|
||||
});
|
||||
}
|
||||
|
||||
private async waitForAssistantHealth(): Promise<void> {
|
||||
const deadline = Date.now() + 30_000;
|
||||
while (Date.now() < deadline) {
|
||||
try {
|
||||
const response = await fetch(`${this.assistantUrl}/api/health`);
|
||||
if (response.ok) return;
|
||||
} catch {
|
||||
await this.sleep(2000);
|
||||
}
|
||||
}
|
||||
throw new Error(`Assistant did not become healthy at ${this.assistantUrl}`);
|
||||
}
|
||||
|
||||
private async waitForGitea(): Promise<void> {
|
||||
const deadline = Date.now() + 60_000;
|
||||
while (Date.now() < deadline) {
|
||||
try {
|
||||
const response = await fetch(`${this.giteaUrl}/api/v1/version`);
|
||||
if (response.ok) return;
|
||||
} catch {
|
||||
await this.sleep(2000);
|
||||
}
|
||||
await this.sleep(2000);
|
||||
}
|
||||
throw new Error(`Gitea did not become available at ${this.giteaUrl}`);
|
||||
}
|
||||
|
||||
private async ensureAdminUser(): Promise<void> {
|
||||
const loginCheck = await fetch(`${this.giteaUrl}/api/v1/user`, {
|
||||
headers: { Authorization: `Basic ${btoa(`${this.adminUser}:${this.adminPass}`)}` },
|
||||
});
|
||||
if (loginCheck.ok) return;
|
||||
|
||||
const body = JSON.stringify({
|
||||
username: this.adminUser,
|
||||
password: this.adminPass,
|
||||
email: `${this.adminUser}@e2e-test.local`,
|
||||
must_change_password: false,
|
||||
login_name: this.adminUser,
|
||||
admin_permission: true,
|
||||
});
|
||||
|
||||
for (const [user, pass] of [
|
||||
[this.adminUser, this.adminPass],
|
||||
['root', 'root'],
|
||||
] as const) {
|
||||
const response = await fetch(`${this.giteaUrl}/api/v1/admin/users`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Basic ${btoa(`${user}:${pass}`)}`,
|
||||
},
|
||||
body,
|
||||
});
|
||||
|
||||
if (response.ok || response.status === 422 || response.status === 409) return;
|
||||
}
|
||||
|
||||
const retryLogin = await fetch(`${this.giteaUrl}/api/v1/user`, {
|
||||
headers: { Authorization: `Basic ${btoa(`${this.adminUser}:${this.adminPass}`)}` },
|
||||
});
|
||||
if (!retryLogin.ok) {
|
||||
throw new Error(
|
||||
`Unable to create or authenticate Gitea admin user: ${retryLogin.status} ${await retryLogin.text()}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private async createToken(): Promise<string> {
|
||||
const response = await fetch(
|
||||
`${this.giteaUrl}/api/v1/users/${encodeURIComponent(this.adminUser)}/tokens`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Basic ${btoa(`${this.adminUser}:${this.adminPass}`)}`,
|
||||
},
|
||||
body: JSON.stringify({ name: `e2e-token-${Date.now()}`, scopes: ['all'] }),
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to create Gitea token: ${response.status} ${await response.text()}`);
|
||||
}
|
||||
|
||||
const payload = (await response.json()) as GiteaTokenResponse;
|
||||
const token = payload.sha1 ?? payload.token;
|
||||
if (!token) throw new Error('Gitea token response did not include sha1/token');
|
||||
return token;
|
||||
}
|
||||
|
||||
private async createRepo(name: string): Promise<GiteaRepo> {
|
||||
return this.giteaFetch<GiteaRepo>('/user/repos', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ name, auto_init: true, default_branch: 'main' }),
|
||||
});
|
||||
}
|
||||
|
||||
private async createPullRequest(
|
||||
owner: string,
|
||||
repo: string,
|
||||
description: string,
|
||||
head: string,
|
||||
base: string
|
||||
): Promise<GiteaPullRequest> {
|
||||
return this.giteaFetch<GiteaPullRequest>(`/repos/${owner}/${repo}/pulls`, {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({
|
||||
title: `E2E: ${description}`,
|
||||
body: `E2E test PR: ${description}`,
|
||||
head,
|
||||
base,
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
private async createWebhook(owner: string, repo: string): Promise<void> {
|
||||
await this.giteaFetch<JsonValue>(`/repos/${owner}/${repo}/hooks`, {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({
|
||||
type: 'gitea',
|
||||
active: true,
|
||||
events: ['pull_request'],
|
||||
config: {
|
||||
url: `${this.assistantUrl}/webhook/gitea`,
|
||||
content_type: 'json',
|
||||
secret: WEBHOOK_SECRET,
|
||||
},
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
private async giteaFetch<T>(apiPath: string, init: RequestInit = {}): Promise<T> {
|
||||
return this.fetchJson<T>(`${this.giteaUrl}/api/v1${apiPath}`, {
|
||||
...init,
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `token ${this.requireToken()}`,
|
||||
...(init.headers ?? {}),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
private async fetchJson<T>(url: string, init: RequestInit = {}): Promise<T> {
|
||||
const response = await fetch(url, init);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status} for ${url}: ${await response.text()}`);
|
||||
}
|
||||
return (await response.json()) as T;
|
||||
}
|
||||
|
||||
private async readScenario(scenarioName: string): Promise<Scenario> {
|
||||
const scenarioPath = path.join(this.fixturesDir(), scenarioName, 'scenario.json');
|
||||
return JSON.parse(await readFile(scenarioPath, 'utf-8')) as Scenario;
|
||||
}
|
||||
|
||||
private async readFixtureFiles(
|
||||
scenarioName: string,
|
||||
fixturePart: 'base' | 'branch'
|
||||
): Promise<Record<string, string>> {
|
||||
const dir = path.join(this.fixturesDir(), scenarioName, fixturePart);
|
||||
const files: Record<string, string> = {};
|
||||
const glob = new Bun.Glob('**/*');
|
||||
|
||||
for await (const file of glob.scan({ cwd: dir, onlyFiles: true })) {
|
||||
files[file] = await readFile(path.join(dir, file), 'utf-8');
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
private async pushBranchWithFiles(
|
||||
owner: string,
|
||||
repo: string,
|
||||
branchName: string,
|
||||
files: Record<string, string>,
|
||||
commitMessage: string
|
||||
): Promise<void> {
|
||||
const tmpDir = mkdtempSync(
|
||||
path.join(tmpdir(), `e2e-push-${branchName.replace(/[^a-z0-9-]/gi, '-')}-`)
|
||||
);
|
||||
const cloneUrl = `${this.giteaUrl.replace('http://', `http://${this.adminUser}:${this.adminPass}@`)}/${owner}/${repo}.git`;
|
||||
|
||||
try {
|
||||
await this.exec(['git', 'clone', cloneUrl, tmpDir]);
|
||||
await this.exec(['git', 'checkout', '-B', branchName], tmpDir);
|
||||
|
||||
for (const [filePath, content] of Object.entries(files)) {
|
||||
const destination = path.join(tmpDir, filePath);
|
||||
mkdirSync(path.dirname(destination), { recursive: true });
|
||||
await Bun.write(destination, content);
|
||||
}
|
||||
|
||||
await this.exec(['git', 'config', 'user.email', 'e2e@test.local'], tmpDir);
|
||||
await this.exec(['git', 'config', 'user.name', 'E2E Bot'], tmpDir);
|
||||
await this.exec(['git', 'add', '-A'], tmpDir);
|
||||
await this.exec(['git', 'commit', '-m', commitMessage, '--allow-empty'], tmpDir);
|
||||
await this.exec(['git', 'push', 'origin', branchName, '--force'], tmpDir);
|
||||
} finally {
|
||||
rmSync(tmpDir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
private async exec(args: string[], cwd?: string): Promise<void> {
|
||||
const proc = Bun.spawn(args, { cwd, stdout: 'pipe', stderr: 'pipe' });
|
||||
const [stdout, stderr, exitCode] = await Promise.all([
|
||||
new Response(proc.stdout).text(),
|
||||
new Response(proc.stderr).text(),
|
||||
proc.exited,
|
||||
]);
|
||||
|
||||
if (exitCode !== 0) {
|
||||
throw new Error(`Command failed (${args.join(' ')}):\n${stdout}\n${stderr}`);
|
||||
}
|
||||
}
|
||||
|
||||
private fixturesDir(): string {
|
||||
return path.resolve(import.meta.dir, '../fixtures');
|
||||
}
|
||||
|
||||
private normalizeRepoUrls(repo: GiteaRepo): GiteaRepo {
|
||||
return {
|
||||
...repo,
|
||||
clone_url: this.normalizeGiteaUrl(repo.clone_url),
|
||||
html_url: this.normalizeGiteaUrl(repo.html_url),
|
||||
ssh_url: repo.ssh_url ? this.normalizeGiteaUrl(repo.ssh_url) : repo.ssh_url,
|
||||
};
|
||||
}
|
||||
|
||||
private normalizeGiteaUrl(value: string): string {
|
||||
return value.replace('http://gitea:3000', this.giteaUrl);
|
||||
}
|
||||
|
||||
private requireToken(): string {
|
||||
if (!this.giteaToken) throw new Error('Gitea token is not initialized');
|
||||
return this.giteaToken;
|
||||
}
|
||||
|
||||
private drainProcessOutput(stream: ReadableStream<Uint8Array>, label: string): void {
|
||||
void new Response(stream).text().then((output) => {
|
||||
if (output.trim().length > 0 && process.env.E2E_DEBUG === '1') {
|
||||
console.log(`[${label}] ${output}`);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
}
|
||||
|
||||
export type { Finding, ReviewWaitResult, Scenario, SeedResult, SessionDetail };
|
||||
21
e2e/fixtures/clean-refactor-pr/base/src/service.ts
Normal file
21
e2e/fixtures/clean-refactor-pr/base/src/service.ts
Normal file
@@ -0,0 +1,21 @@
|
||||
interface Order {
|
||||
id: string;
|
||||
total: number;
|
||||
}
|
||||
|
||||
interface Invoice {
|
||||
id: string;
|
||||
total: number;
|
||||
}
|
||||
|
||||
export function summarizeOrder(order: Order): string {
|
||||
const rounded = Math.round(order.total * 100) / 100;
|
||||
const formatted = rounded.toFixed(2);
|
||||
return `Order ${order.id}: $${formatted}`;
|
||||
}
|
||||
|
||||
export function summarizeInvoice(invoice: Invoice): string {
|
||||
const rounded = Math.round(invoice.total * 100) / 100;
|
||||
const formatted = rounded.toFixed(2);
|
||||
return `Invoice ${invoice.id}: $${formatted}`;
|
||||
}
|
||||
22
e2e/fixtures/clean-refactor-pr/branch/src/service.ts
Normal file
22
e2e/fixtures/clean-refactor-pr/branch/src/service.ts
Normal file
@@ -0,0 +1,22 @@
|
||||
interface Order {
|
||||
id: string;
|
||||
total: number;
|
||||
}
|
||||
|
||||
interface Invoice {
|
||||
id: string;
|
||||
total: number;
|
||||
}
|
||||
|
||||
function formatCurrency(total: number): string {
|
||||
const rounded = Math.round(total * 100) / 100;
|
||||
return rounded.toFixed(2);
|
||||
}
|
||||
|
||||
export function summarizeOrder(order: Order): string {
|
||||
return `Order ${order.id}: $${formatCurrency(order.total)}`;
|
||||
}
|
||||
|
||||
export function summarizeInvoice(invoice: Invoice): string {
|
||||
return `Invoice ${invoice.id}: $${formatCurrency(invoice.total)}`;
|
||||
}
|
||||
9
e2e/fixtures/clean-refactor-pr/scenario.json
Normal file
9
e2e/fixtures/clean-refactor-pr/scenario.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"name": "clean-refactor-pr",
|
||||
"description": "正确的重构",
|
||||
"expectedTriageMode": "light",
|
||||
"expectedDomains": ["correctness"],
|
||||
"minFindings": 0,
|
||||
"maxFindings": 1,
|
||||
"minHighSeverity": 0
|
||||
}
|
||||
7
e2e/fixtures/docs-only-pr/base/src/app.ts
Normal file
7
e2e/fixtures/docs-only-pr/base/src/app.ts
Normal file
@@ -0,0 +1,7 @@
|
||||
export function startApp(): string {
|
||||
return 'sunny-cactus app started';
|
||||
}
|
||||
|
||||
if (import.meta.main) {
|
||||
console.log(startApp());
|
||||
}
|
||||
7
e2e/fixtures/docs-only-pr/branch/README.md
Normal file
7
e2e/fixtures/docs-only-pr/branch/README.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# Sunny Cactus Demo
|
||||
|
||||
This fixture updates documentation only. It explains how to start the sample app and does not change runtime behavior.
|
||||
|
||||
## Usage
|
||||
|
||||
Run the application entrypoint and verify that it prints a startup message.
|
||||
8
e2e/fixtures/docs-only-pr/scenario.json
Normal file
8
e2e/fixtures/docs-only-pr/scenario.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"name": "docs-only-pr",
|
||||
"description": "纯文档变更",
|
||||
"expectedTriageMode": "skip",
|
||||
"expectedDomains": [],
|
||||
"minFindings": 0,
|
||||
"minHighSeverity": 0
|
||||
}
|
||||
22
e2e/fixtures/duplicate-webhook-pr/base/src/auth.ts
Normal file
22
e2e/fixtures/duplicate-webhook-pr/base/src/auth.ts
Normal file
@@ -0,0 +1,22 @@
|
||||
export interface User {
|
||||
id: string;
|
||||
name: string;
|
||||
role: 'user' | 'admin';
|
||||
}
|
||||
|
||||
const users = new Map<string, User>([
|
||||
['token-user', { id: 'u1', name: 'Alice', role: 'user' }],
|
||||
['token-admin', { id: 'u2', name: 'Bob', role: 'admin' }],
|
||||
]);
|
||||
|
||||
export function authenticate(token: string): User | null {
|
||||
if (!token.trim()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return users.get(token) ?? null;
|
||||
}
|
||||
|
||||
export function requireAdmin(user: User | null): boolean {
|
||||
return user?.role === 'admin';
|
||||
}
|
||||
20
e2e/fixtures/duplicate-webhook-pr/branch/src/user-handler.ts
Normal file
20
e2e/fixtures/duplicate-webhook-pr/branch/src/user-handler.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
interface UserRecord {
|
||||
id: string;
|
||||
email: string;
|
||||
profile?: {
|
||||
displayName?: string;
|
||||
};
|
||||
}
|
||||
|
||||
interface Database {
|
||||
query<T = unknown>(sql: string): Promise<T[]>;
|
||||
}
|
||||
|
||||
export async function getUserDisplayName(user: UserRecord | null): Promise<string> {
|
||||
return user.profile!.displayName!.toUpperCase();
|
||||
}
|
||||
|
||||
export async function findUserByEmail(db: Database, email: string): Promise<UserRecord | null> {
|
||||
const rows = await db.query<UserRecord>(`SELECT * FROM users WHERE email = '${email}'`);
|
||||
return rows[0] ?? null;
|
||||
}
|
||||
9
e2e/fixtures/duplicate-webhook-pr/scenario.json
Normal file
9
e2e/fixtures/duplicate-webhook-pr/scenario.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"name": "duplicate-webhook-pr",
|
||||
"description": "重复webhook幂等性测试",
|
||||
"expectedTriageMode": "light",
|
||||
"expectedDomains": ["correctness"],
|
||||
"minFindings": 1,
|
||||
"minHighSeverity": 0,
|
||||
"testIdempotency": true
|
||||
}
|
||||
15
e2e/fixtures/minimal-change-pr/base/src/utils.ts
Normal file
15
e2e/fixtures/minimal-change-pr/base/src/utils.ts
Normal file
@@ -0,0 +1,15 @@
|
||||
export function normalizeScore(score: number): number {
|
||||
if (score < 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (score > 100) {
|
||||
return 100;
|
||||
}
|
||||
|
||||
return Math.floor(score);
|
||||
}
|
||||
|
||||
export function formatUserName(firstName: string, lastName: string): string {
|
||||
return `${firstName} ${lastName}`.trim();
|
||||
}
|
||||
15
e2e/fixtures/minimal-change-pr/branch/src/utils.ts
Normal file
15
e2e/fixtures/minimal-change-pr/branch/src/utils.ts
Normal file
@@ -0,0 +1,15 @@
|
||||
export function normalizeScore(score: number): number {
|
||||
if (score <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (score >= 100) {
|
||||
return 100;
|
||||
}
|
||||
|
||||
return Math.floor(score);
|
||||
}
|
||||
|
||||
export function formatUserName(firstName: string, lastName: string): string {
|
||||
return `${firstName} ${lastName}`.trim();
|
||||
}
|
||||
9
e2e/fixtures/minimal-change-pr/scenario.json
Normal file
9
e2e/fixtures/minimal-change-pr/scenario.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"name": "minimal-change-pr",
|
||||
"description": "单文件微量变更",
|
||||
"expectedTriageMode": "light",
|
||||
"expectedDomains": ["correctness"],
|
||||
"minFindings": 0,
|
||||
"maxFindings": 3,
|
||||
"minHighSeverity": 0
|
||||
}
|
||||
12
e2e/fixtures/security-pr/base/src/auth.ts
Normal file
12
e2e/fixtures/security-pr/base/src/auth.ts
Normal file
@@ -0,0 +1,12 @@
|
||||
export interface TokenPayload {
|
||||
sub: string;
|
||||
exp: number;
|
||||
}
|
||||
|
||||
export function verifyToken(token: string, expectedToken: string): boolean {
|
||||
return token.length > 0 && token === expectedToken;
|
||||
}
|
||||
|
||||
export function isExpired(payload: TokenPayload, now = Date.now()): boolean {
|
||||
return payload.exp * 1000 <= now;
|
||||
}
|
||||
14
e2e/fixtures/security-pr/branch/src/auth.ts
Normal file
14
e2e/fixtures/security-pr/branch/src/auth.ts
Normal file
@@ -0,0 +1,14 @@
|
||||
export interface TokenPayload {
|
||||
sub: string;
|
||||
exp: number;
|
||||
}
|
||||
|
||||
const FALLBACK_ADMIN_TOKEN = 'admin-super-secret-token';
|
||||
|
||||
export function verifyToken(token: string, expectedToken: string): boolean {
|
||||
return token.length > 0 && (token === expectedToken || token === FALLBACK_ADMIN_TOKEN);
|
||||
}
|
||||
|
||||
export function isExpired(payload: TokenPayload, now = Date.now()): boolean {
|
||||
return payload.exp * 1000 <= now;
|
||||
}
|
||||
8
e2e/fixtures/security-pr/scenario.json
Normal file
8
e2e/fixtures/security-pr/scenario.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"name": "security-pr",
|
||||
"description": "安全相关变更",
|
||||
"expectedTriageMode": "light",
|
||||
"expectedDomains": ["correctness"],
|
||||
"minFindings": 1,
|
||||
"minHighSeverity": 0
|
||||
}
|
||||
22
e2e/fixtures/simple-bug-pr/base/src/auth.ts
Normal file
22
e2e/fixtures/simple-bug-pr/base/src/auth.ts
Normal file
@@ -0,0 +1,22 @@
|
||||
export interface User {
|
||||
id: string;
|
||||
name: string;
|
||||
role: 'user' | 'admin';
|
||||
}
|
||||
|
||||
const users = new Map<string, User>([
|
||||
['token-user', { id: 'u1', name: 'Alice', role: 'user' }],
|
||||
['token-admin', { id: 'u2', name: 'Bob', role: 'admin' }],
|
||||
]);
|
||||
|
||||
export function authenticate(token: string): User | null {
|
||||
if (!token.trim()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return users.get(token) ?? null;
|
||||
}
|
||||
|
||||
export function requireAdmin(user: User | null): boolean {
|
||||
return user?.role === 'admin';
|
||||
}
|
||||
22
e2e/fixtures/simple-bug-pr/branch/src/auth.ts
Normal file
22
e2e/fixtures/simple-bug-pr/branch/src/auth.ts
Normal file
@@ -0,0 +1,22 @@
|
||||
export interface User {
|
||||
id: string;
|
||||
name: string;
|
||||
role: 'user' | 'admin';
|
||||
}
|
||||
|
||||
const users = new Map<string, User>([
|
||||
['token-user', { id: 'u1', name: 'Alice', role: 'user' }],
|
||||
['token-admin', { id: 'u2', name: 'Bob', role: 'admin' }],
|
||||
]);
|
||||
|
||||
export function authenticate(token: string): User | null {
|
||||
if (!token.trim()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return users.get(token) ?? null;
|
||||
}
|
||||
|
||||
export function requireAdmin(user: User | null): boolean {
|
||||
return user?.role === 'admin';
|
||||
}
|
||||
39
e2e/fixtures/simple-bug-pr/branch/src/user-handler.ts
Normal file
39
e2e/fixtures/simple-bug-pr/branch/src/user-handler.ts
Normal file
@@ -0,0 +1,39 @@
|
||||
import { User } from './auth';
|
||||
|
||||
interface UserRecord {
|
||||
id: string;
|
||||
email: string;
|
||||
profile?: {
|
||||
displayName?: string;
|
||||
};
|
||||
}
|
||||
|
||||
interface Database {
|
||||
query<T = unknown>(sql: string): Promise<T[]>;
|
||||
}
|
||||
|
||||
export async function getUserDisplayName(user: UserRecord | null): Promise<string> {
|
||||
return user.profile!.displayName!.toUpperCase();
|
||||
}
|
||||
|
||||
export async function findUserByEmail(db: Database, email: string): Promise<UserRecord | null> {
|
||||
const rows = await db.query<UserRecord>(`SELECT * FROM users WHERE email = '${email}'`);
|
||||
return rows[0] ?? null;
|
||||
}
|
||||
|
||||
export function validateUserRole(user: User | null, requiredRole: string): boolean {
|
||||
const hardcodedSecret = 'sk-abc123secretkey456';
|
||||
if (hardcodedSecret) {
|
||||
return user?.role === requiredRole;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
export function deleteUser(users: Map<string, User>, userId: string): Map<string, User> {
|
||||
const user = users.get(userId);
|
||||
if (user!.role === 'admin') {
|
||||
throw new Error('Cannot delete admin user');
|
||||
}
|
||||
users.delete(userId);
|
||||
return users;
|
||||
}
|
||||
8
e2e/fixtures/simple-bug-pr/scenario.json
Normal file
8
e2e/fixtures/simple-bug-pr/scenario.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"name": "simple-bug-pr",
|
||||
"description": "包含空指针、SQL注入、硬编码密钥的PR",
|
||||
"expectedTriageMode": "light",
|
||||
"expectedDomains": ["correctness"],
|
||||
"minFindings": 2,
|
||||
"minHighSeverity": 1
|
||||
}
|
||||
104
e2e/llm-mock.test.ts
Normal file
104
e2e/llm-mock.test.ts
Normal file
@@ -0,0 +1,104 @@
|
||||
import { describe, expect, test } from 'bun:test';
|
||||
import { createMockChatForRole, isE2EMockActive } from './llm-mock';
|
||||
|
||||
describe('LLM Mock', () => {
|
||||
test('specialist role returns preset findings', async () => {
|
||||
const mock = createMockChatForRole();
|
||||
const response = await mock('specialist', {
|
||||
messages: [
|
||||
{ role: 'system', content: 'You are a code reviewer' },
|
||||
{ role: 'user', content: 'Review this code' },
|
||||
],
|
||||
});
|
||||
|
||||
expect(response.finishReason).toBe('stop');
|
||||
expect(response.toolCalls).toEqual([]);
|
||||
const parsed = JSON.parse(response.content!);
|
||||
expect(parsed.findings).toBeDefined();
|
||||
expect(parsed.findings.length).toBeGreaterThanOrEqual(1);
|
||||
expect(parsed.findings[0].severity).toBe('high');
|
||||
expect(parsed.findings[0].path).toBe('src/user-handler.ts');
|
||||
});
|
||||
|
||||
test('specialist role simulates autonomous search and cross-file reads when tools are available', async () => {
|
||||
const mock = createMockChatForRole();
|
||||
const tools = [
|
||||
{
|
||||
name: 'search_code',
|
||||
description: 'search',
|
||||
parameters: { type: 'object', properties: {} },
|
||||
},
|
||||
{ name: 'read_file', description: 'read', parameters: { type: 'object', properties: {} } },
|
||||
];
|
||||
const messages = [
|
||||
{ role: 'system' as const, content: 'You are a code reviewer' },
|
||||
{ role: 'user' as const, content: 'Review this code' },
|
||||
];
|
||||
|
||||
const turn1 = await mock('specialist', { messages, tools });
|
||||
expect(turn1.finishReason).toBe('tool_calls');
|
||||
expect(turn1.toolCalls.map((toolCall) => toolCall.name)).toEqual(['search_code']);
|
||||
|
||||
const turn2 = await mock('specialist', {
|
||||
messages: [
|
||||
...messages,
|
||||
{ role: 'assistant', content: '', toolCalls: turn1.toolCalls },
|
||||
{ role: 'tool', toolCallId: 'e2e_search_user_handler', content: '{"matches":[]}' },
|
||||
],
|
||||
tools,
|
||||
});
|
||||
expect(turn2.toolCalls.map((toolCall) => toolCall.name)).toEqual(['read_file']);
|
||||
expect(JSON.parse(turn2.toolCalls[0].arguments)).toEqual({ file_path: 'src/user-handler.ts' });
|
||||
|
||||
const turn3 = await mock('specialist', {
|
||||
messages: [
|
||||
...messages,
|
||||
{ role: 'tool', toolCallId: 'e2e_search_user_handler', content: '{"matches":[]}' },
|
||||
{ role: 'tool', toolCallId: 'e2e_read_caller', content: '{"path":"src/user-handler.ts"}' },
|
||||
],
|
||||
tools,
|
||||
});
|
||||
expect(turn3.toolCalls.map((toolCall) => toolCall.name)).toEqual(['read_file']);
|
||||
expect(JSON.parse(turn3.toolCalls[0].arguments)).toEqual({ file_path: 'src/auth.ts' });
|
||||
|
||||
const turn4 = await mock('specialist', {
|
||||
messages: [
|
||||
...messages,
|
||||
{ role: 'tool', toolCallId: 'e2e_search_user_handler', content: '{"matches":[]}' },
|
||||
{ role: 'tool', toolCallId: 'e2e_read_caller', content: '{"path":"src/user-handler.ts"}' },
|
||||
{ role: 'tool', toolCallId: 'e2e_read_callee', content: '{"path":"src/auth.ts"}' },
|
||||
],
|
||||
tools,
|
||||
});
|
||||
expect(turn4.finishReason).toBe('stop');
|
||||
expect(turn4.toolCalls).toEqual([]);
|
||||
const parsed = JSON.parse(turn4.content!);
|
||||
expect(parsed.findings[0].detail).toContain('auth/user model');
|
||||
expect(parsed.findings[0].evidence).toContain('src/auth.ts');
|
||||
});
|
||||
|
||||
test('planner role returns preset summary', async () => {
|
||||
const mock = createMockChatForRole();
|
||||
const response = await mock('planner', {
|
||||
messages: [{ role: 'user', content: 'Summarize this diff' }],
|
||||
});
|
||||
|
||||
const parsed = JSON.parse(response.content!);
|
||||
expect(parsed.summary).toBeDefined();
|
||||
expect(parsed.keyConcerns).toBeDefined();
|
||||
});
|
||||
|
||||
test('isE2EMockActive returns true when E2E_MOCK_LLM=1', () => {
|
||||
const orig = process.env.E2E_MOCK_LLM;
|
||||
process.env.E2E_MOCK_LLM = '1';
|
||||
expect(isE2EMockActive()).toBe(true);
|
||||
process.env.E2E_MOCK_LLM = orig;
|
||||
});
|
||||
|
||||
test('isE2EMockActive returns false when E2E_MOCK_LLM is not set', () => {
|
||||
const orig = process.env.E2E_MOCK_LLM;
|
||||
process.env.E2E_MOCK_LLM = undefined;
|
||||
expect(isE2EMockActive()).toBe(false);
|
||||
if (orig !== undefined) process.env.E2E_MOCK_LLM = orig;
|
||||
});
|
||||
});
|
||||
1
e2e/llm-mock.ts
Normal file
1
e2e/llm-mock.ts
Normal file
@@ -0,0 +1 @@
|
||||
export { createMockChatForRole, isE2EMockActive } from '../src/llm/e2e-mock';
|
||||
55
e2e/seed.sh
55
e2e/seed.sh
@@ -26,12 +26,12 @@ for i in $(seq 1 30); do
|
||||
done
|
||||
|
||||
echo "=== [2/6] 创建管理员用户 ==="
|
||||
docker exec e2e-gitea gitea admin user create \
|
||||
docker exec -u git e2e-gitea gitea admin user create \
|
||||
--username "${ADMIN_USER}" \
|
||||
--password "${ADMIN_PASS}" \
|
||||
--email "${ADMIN_EMAIL}" \
|
||||
--admin \
|
||||
--must-change-password=false 2>/dev/null || echo " 用户已存在,跳过"
|
||||
--must-change-password=false 2>/dev/null || echo " 用户已存在,跳过"
|
||||
|
||||
echo "=== [3/6] 生成 API Token ==="
|
||||
TOKEN_RESPONSE=$(curl -sf -X POST "${GITEA_URL}/api/v1/users/${ADMIN_USER}/tokens" \
|
||||
@@ -120,37 +120,43 @@ ADMIN_DEFAULT_PASS="password"
|
||||
|
||||
# Wait for assistant to be healthy
|
||||
for i in $(seq 1 20); do
|
||||
if curl -sf "${ASSISTANT_URL}/" > /dev/null 2>&1; then
|
||||
echo " Assistant 已就绪"
|
||||
if curl -sf "${ASSISTANT_URL}/api/health" > /dev/null 2>&1; then
|
||||
echo " Assistant 已就绪"
|
||||
break
|
||||
fi
|
||||
echo " 等待 Assistant... ($i/20)"
|
||||
echo " 等待 Assistant... ($i/20)"
|
||||
sleep 3
|
||||
done
|
||||
|
||||
# Login to get JWT
|
||||
LOGIN_RESP=$(curl -sf -X POST "${ASSISTANT_URL}/admin/login" \
|
||||
# Login to get JWT (正确路径: /admin/api/login)
|
||||
LOGIN_RESP=$(curl -sf -X POST "${ASSISTANT_URL}/admin/api/login" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"password\": \"${ADMIN_DEFAULT_PASS}\"}" 2>/dev/null || true)
|
||||
ADMIN_JWT=$(echo "${LOGIN_RESP}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('token',''))" 2>/dev/null || true)
|
||||
|
||||
if [ -z "${ADMIN_JWT}" ]; then
|
||||
echo " WARNING: 无法获取管理员 JWT,跳过 assistant 配置"
|
||||
echo " WARNING: 无法获取管理员 JWT,跳过 assistant 配置"
|
||||
else
|
||||
echo " JWT 获取成功,配置 assistant 设置..."
|
||||
curl -sf -X PUT "${ASSISTANT_URL}/admin/config" \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer ${ADMIN_JWT}" \
|
||||
-d "{
|
||||
\"WEBHOOK_SECRET\": \"${WEBHOOK_SECRET}\",
|
||||
\"GITEA_API_URL\": \"http://gitea:3000/api/v1\",
|
||||
\"REVIEW_ENGINE\": \"agent\",
|
||||
\"REVIEW_WORKDIR\": \"/tmp/e2e-review\",
|
||||
\"REVIEW_AUTO_PUBLISH_MIN_CONFIDENCE\": \"0.5\",
|
||||
\"REVIEW_ENABLE_HUMAN_GATE\": \"false\",
|
||||
\"REVIEW_ALLOWED_COMMANDS\": \"git,rg,cat,sed,wc\",
|
||||
\"REVIEW_COMMAND_TIMEOUT_MS\": \"30000\"
|
||||
}" > /dev/null 2>&1 && echo " Assistant 配置完成" || echo " WARNING: assistant 配置失败"
|
||||
echo " JWT 获取成功,配置 assistant 设置..."
|
||||
|
||||
# 逐项配置(避免 JSON 格式化问题)
|
||||
set_assistant_config() {
|
||||
local key="$1" value="$2"
|
||||
curl -sf -X PUT "${ASSISTANT_URL}/admin/api/config" \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer ${ADMIN_JWT}" \
|
||||
-d "{\"${key}\": \"${value}\"}" > /dev/null 2>&1
|
||||
}
|
||||
|
||||
set_assistant_config "WEBHOOK_SECRET" "${WEBHOOK_SECRET}"
|
||||
set_assistant_config "GITEA_API_URL" "http://gitea:3000/api/v1"
|
||||
set_assistant_config "GITEA_ACCESS_TOKEN" "${GITEA_TOKEN}"
|
||||
set_assistant_config "REVIEW_ENGINE" "kernel"
|
||||
set_assistant_config "REVIEW_ENABLE_HUMAN_GATE" "false"
|
||||
set_assistant_config "REVIEW_ALLOWED_COMMANDS" "git,rg,cat,sed,wc"
|
||||
set_assistant_config "REVIEW_COMMAND_TIMEOUT_MS" "30000"
|
||||
|
||||
echo " Assistant 配置完成(含 Gitea 连接参数)"
|
||||
fi
|
||||
|
||||
echo "=== [6/7] 配置 Webhook ==="
|
||||
@@ -207,6 +213,5 @@ echo " PR: #${PR_NUMBER}"
|
||||
echo " Token: ${GITEA_TOKEN:0:8}..."
|
||||
echo ""
|
||||
echo "下一步:"
|
||||
echo " 1. 更新 assistant 容器的 GITEA_ACCESS_TOKEN:"
|
||||
echo " E2E_GITEA_TOKEN=${GITEA_TOKEN} docker compose -f docker-compose.e2e.yml up -d assistant"
|
||||
echo " 2. 运行测试: ./e2e/test.sh"
|
||||
echo " 1. 触发 PR webhook 或推送 feature 分支新提交"
|
||||
echo " 2. 运行 E2E 测试: bun run test:e2e"
|
||||
|
||||
@@ -2,6 +2,7 @@ import { BrowserRouter, Routes, Route, Navigate } from 'react-router-dom';
|
||||
import { useAuth } from './hooks/useAuth';
|
||||
import { LoginPage } from './pages/LoginPage';
|
||||
import DashboardPage from './pages/DashboardPage';
|
||||
import ReviewSessionsPage from './pages/ReviewSessionsPage';
|
||||
import { RepositoryManager } from './components/RepositoryManager';
|
||||
import { ConfigManager } from './components/ConfigManager';
|
||||
import { NotificationConfigPage } from './components/NotificationConfigPage';
|
||||
@@ -49,12 +50,13 @@ function AppContent() {
|
||||
</AuthGuard>
|
||||
}
|
||||
>
|
||||
<Route index element={<Navigate to="/repos" replace />} />
|
||||
<Route index element={<Navigate to="/sessions" replace />} />
|
||||
<Route path="sessions" element={<ReviewSessionsPage />} />
|
||||
<Route path="repos" element={<RepositoryManager />} />
|
||||
<Route path="config" element={<ConfigManager />} />
|
||||
<Route path="notifications" element={<NotificationConfigPage />} />
|
||||
<Route path="review-config" element={<ReviewConfigPage />} />
|
||||
<Route path="*" element={<Navigate to="/repos" replace />} />
|
||||
<Route path="*" element={<Navigate to="/sessions" replace />} />
|
||||
</Route>
|
||||
</Routes>
|
||||
<Toaster theme={resolvedTheme === 'dark' ? 'dark' : 'light'} />
|
||||
|
||||
@@ -17,7 +17,7 @@ import { toast } from 'sonner';
|
||||
// Engine-specific field visibility
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type EngineMode = 'agent' | 'codex';
|
||||
type EngineMode = 'kernel' | 'codex';
|
||||
|
||||
/** The engine selector field — always visible at the top. */
|
||||
const ENGINE_FIELD = 'REVIEW_ENGINE';
|
||||
@@ -30,8 +30,7 @@ const AGENT_SHARED_FIELDS = new Set([
|
||||
'REVIEW_MAX_FILE_CONTENT_CHARS',
|
||||
]);
|
||||
|
||||
/** Fields specific to agent mode only. */
|
||||
const AGENT_ONLY_FIELDS = new Set([
|
||||
const KERNEL_ONLY_FIELDS = new Set([
|
||||
'REVIEW_AUTO_PUBLISH_MIN_CONFIDENCE',
|
||||
'REVIEW_ENABLE_HUMAN_GATE',
|
||||
'REVIEW_ALLOWED_COMMANDS',
|
||||
@@ -62,8 +61,8 @@ function getVisibleFields(engine: EngineMode, fields: ConfigFieldDto[]): ConfigF
|
||||
return fields.filter((f) => {
|
||||
if (f.envKey === ENGINE_FIELD) return false; // rendered separately
|
||||
switch (engine) {
|
||||
case 'agent':
|
||||
return AGENT_SHARED_FIELDS.has(f.envKey) || AGENT_ONLY_FIELDS.has(f.envKey);
|
||||
case 'kernel':
|
||||
return AGENT_SHARED_FIELDS.has(f.envKey) || KERNEL_ONLY_FIELDS.has(f.envKey);
|
||||
case 'codex':
|
||||
return CODEX_FIELDS.has(f.envKey);
|
||||
default:
|
||||
@@ -77,7 +76,7 @@ function getVisibleFields(engine: EngineMode, fields: ConfigFieldDto[]): ConfigF
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const ENGINE_OPTIONS: { value: EngineMode; label: string; description: string }[] = [
|
||||
{ value: 'agent', label: 'Agent', description: '多代理编排深度审查' },
|
||||
{ value: 'kernel', label: 'Kernel', description: 'PR Session + Agentic Loop 审查' },
|
||||
{ value: 'codex', label: 'Codex', description: 'Codex CLI 审查' },
|
||||
];
|
||||
|
||||
@@ -98,20 +97,19 @@ export function ReviewConfigPage() {
|
||||
// Derived: current engine mode
|
||||
const engine: EngineMode = useMemo(() => {
|
||||
const val = localConfig[ENGINE_FIELD];
|
||||
if (val === 'agent' || val === 'codex') return val;
|
||||
return 'agent';
|
||||
if (val === 'kernel' || val === 'codex') return val;
|
||||
return 'kernel';
|
||||
}, [localConfig]);
|
||||
|
||||
// Derived: review group and memory group from fetched data
|
||||
// Derived: review group from fetched data
|
||||
const reviewGroup = useMemo(() => data?.groups.find((g) => g.key === 'review'), [data]);
|
||||
const memoryGroup = useMemo(() => data?.groups.find((g) => g.key === 'memory'), [data]);
|
||||
|
||||
// Initialize local config from ALL groups (so save works for review + memory fields)
|
||||
// Initialize local config from review group
|
||||
useEffect(() => {
|
||||
if (data) {
|
||||
const initialState: Record<string, any> = {};
|
||||
data.groups
|
||||
.filter((g) => g.key === 'review' || g.key === 'memory')
|
||||
.filter((g) => g.key === 'review')
|
||||
.forEach((group) => {
|
||||
group.fields.forEach((field) => {
|
||||
if (field.sensitive && field.hasValue) {
|
||||
@@ -175,11 +173,9 @@ export function ReviewConfigPage() {
|
||||
};
|
||||
|
||||
const handleResetAll = () => {
|
||||
const groups = [reviewGroup, memoryGroup].filter(Boolean) as ConfigGroupDto[];
|
||||
const allOverrideKeys = groups
|
||||
.flatMap((g) => g.fields)
|
||||
.filter((f) => f.source === 'db')
|
||||
.map((f) => f.envKey);
|
||||
const allOverrideKeys = (reviewGroup?.fields ?? [])
|
||||
.filter((f) => f.source === 'db')
|
||||
.map((f) => f.envKey);
|
||||
if (allOverrideKeys.length === 0) return;
|
||||
if (confirm('确定要重置所有审查配置到默认值吗?这将立即生效。')) {
|
||||
resetMutation.mutate(allOverrideKeys);
|
||||
@@ -193,9 +189,8 @@ export function ReviewConfigPage() {
|
||||
);
|
||||
|
||||
const hasOverrides = useMemo(() => {
|
||||
const groups = [reviewGroup, memoryGroup].filter(Boolean) as ConfigGroupDto[];
|
||||
return groups.some((g) => g.fields.some((f) => f.source === 'db'));
|
||||
}, [reviewGroup, memoryGroup]);
|
||||
return (reviewGroup?.fields ?? []).some((f) => f.source === 'db');
|
||||
}, [reviewGroup]);
|
||||
|
||||
// -- Render states --
|
||||
|
||||
@@ -225,11 +220,11 @@ export function ReviewConfigPage() {
|
||||
const syntheticReviewGroup: ConfigGroupDto | null = reviewGroup
|
||||
? {
|
||||
...reviewGroup,
|
||||
label: engine === 'codex' ? 'Codex 审查设置' : 'Agent 审查设置',
|
||||
label: engine === 'codex' ? 'Codex 审查设置' : 'Kernel 审查设置',
|
||||
description:
|
||||
engine === 'codex'
|
||||
? 'Codex CLI 审查引擎配置'
|
||||
: '多代理编排审查引擎配置',
|
||||
: '基于 PR Session 的 agentic loop 审查引擎配置',
|
||||
fields: visibleReviewFields,
|
||||
}
|
||||
: null;
|
||||
@@ -358,18 +353,7 @@ export function ReviewConfigPage() {
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* Memory group — agent mode only */}
|
||||
{engine === 'agent' && memoryGroup && (
|
||||
<ConfigGroupCard
|
||||
group={memoryGroup}
|
||||
localConfig={localConfig}
|
||||
onFieldChange={handleFieldChange}
|
||||
onReset={handleResetGroup}
|
||||
isResetting={resetMutation.isPending}
|
||||
/>
|
||||
)}
|
||||
|
||||
{engine !== 'codex' && (
|
||||
{engine === 'kernel' && (
|
||||
<>
|
||||
<ProviderList />
|
||||
<RoleAssignment />
|
||||
|
||||
@@ -2,27 +2,59 @@ import { useState, useEffect } from 'react';
|
||||
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
|
||||
import { Card, CardHeader, CardTitle, CardDescription, CardContent } from '@/components/ui/card';
|
||||
import { Button } from '@/components/ui/button';
|
||||
import { Alert, AlertDescription, AlertTitle } from '@/components/ui/alert';
|
||||
import { Badge } from '@/components/ui/badge';
|
||||
import { Label } from '@/components/ui/label';
|
||||
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select';
|
||||
import { Separator } from '@/components/ui/separator';
|
||||
import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from '@/components/ui/table';
|
||||
import { toast } from 'sonner';
|
||||
import { Save, ShieldCheck } from 'lucide-react';
|
||||
import { fetchProviders, fetchRoles, setRole } from '@/services/llmProviderService';
|
||||
import { Bot, Route, Save, ShieldCheck, Sparkles, Workflow } from 'lucide-react';
|
||||
import {
|
||||
fetchKernelSubagents,
|
||||
fetchProviders,
|
||||
fetchRoles,
|
||||
setRole,
|
||||
type KernelSubagentDto,
|
||||
} from '@/services/llmProviderService';
|
||||
import { ModelCombobox } from './ModelCombobox';
|
||||
|
||||
const ROLE_LABELS: Record<string, { label: string; desc: string }> = {
|
||||
planner: { label: '规划器 Planner', desc: '多阶段审查的第一步,负责分析上下文并分配任务' },
|
||||
specialist: { label: '专家 Specialist', desc: '执行深度代码审查的主力模型,专注于发现具体问题' },
|
||||
judge: { label: '评审 Judge', desc: '对专家的建议进行审核、合并和过滤,确保评论质量' },
|
||||
embedding: { label: '嵌入 Embedding', desc: '用于向量化代码和注释,支持语义搜索 (Qdrant)' },
|
||||
planner: { label: 'Planner', desc: '用于 triage / planning / context compression,负责审查分流与上下文压缩' },
|
||||
specialist: { label: 'Specialist', desc: '用于 correctness / security / quality 等深度审查' },
|
||||
};
|
||||
|
||||
const ROLES = ['planner', 'specialist', 'judge', 'embedding'];
|
||||
const ROLES = ['planner', 'specialist'];
|
||||
|
||||
interface RoleState {
|
||||
providerId: string | null;
|
||||
model: string;
|
||||
}
|
||||
|
||||
function getModelRoleBadgeClass(modelRole?: string): string {
|
||||
switch (modelRole) {
|
||||
case 'planner':
|
||||
return 'border-info/30 bg-info/10 text-info';
|
||||
case 'specialist':
|
||||
return 'border-primary/30 bg-primary/10 text-primary';
|
||||
default:
|
||||
return 'border-border bg-muted/40 text-muted-foreground';
|
||||
}
|
||||
}
|
||||
|
||||
function getSourceBadgeClass(source: KernelSubagentDto['source']): string {
|
||||
switch (source) {
|
||||
case 'built-in':
|
||||
return 'border-primary/20 bg-primary/10 text-primary';
|
||||
case 'plugin':
|
||||
return 'border-warning/20 bg-warning/10 text-warning';
|
||||
case 'custom':
|
||||
return 'border-success/20 bg-success/10 text-success';
|
||||
default:
|
||||
return 'border-border bg-muted/40 text-muted-foreground';
|
||||
}
|
||||
}
|
||||
|
||||
export function RoleAssignment() {
|
||||
const queryClient = useQueryClient();
|
||||
const [roleStates, setRoleStates] = useState<Record<string, RoleState>>({});
|
||||
@@ -37,6 +69,11 @@ export function RoleAssignment() {
|
||||
queryFn: fetchRoles,
|
||||
});
|
||||
|
||||
const { data: subagents = [], isLoading: isSubagentsLoading } = useQuery({
|
||||
queryKey: ['kernel-subagents'],
|
||||
queryFn: fetchKernelSubagents,
|
||||
});
|
||||
|
||||
useEffect(() => {
|
||||
if (roles.length > 0) {
|
||||
const initial: Record<string, RoleState> = {};
|
||||
@@ -46,7 +83,6 @@ export function RoleAssignment() {
|
||||
model: role.model || '',
|
||||
};
|
||||
});
|
||||
// Fill missing roles
|
||||
ROLES.forEach(r => {
|
||||
if (!initial[r]) {
|
||||
initial[r] = { providerId: null, model: '' };
|
||||
@@ -118,96 +154,239 @@ export function RoleAssignment() {
|
||||
<div className="w-10 h-10 rounded-xl bg-warning/10 flex items-center justify-center border border-warning/20 group-hover:bg-warning/20 transition-all duration-300">
|
||||
<ShieldCheck className="h-5 w-5 text-warning" />
|
||||
</div>
|
||||
<div className="space-y-1">
|
||||
<CardTitle className="text-xl font-bold text-foreground tracking-tight">
|
||||
角色分配
|
||||
</CardTitle>
|
||||
<CardDescription className="text-muted-foreground">
|
||||
为 AI 审查系统的不同角色指定提供商和模型
|
||||
</CardDescription>
|
||||
</div>
|
||||
<div className="space-y-1">
|
||||
<CardTitle className="text-xl font-bold text-foreground tracking-tight">
|
||||
Subagents 与模型路由
|
||||
</CardTitle>
|
||||
<CardDescription className="text-muted-foreground">
|
||||
上层展示 subagent 目录,下层配置 Planner / Specialist 模型路由
|
||||
</CardDescription>
|
||||
</div>
|
||||
</div>
|
||||
</CardHeader>
|
||||
|
||||
<CardContent className="theme-card-content">
|
||||
{isLoading ? (
|
||||
<div className="h-32 flex items-center justify-center text-muted-foreground gap-2">
|
||||
<div className="w-4 h-4 rounded-full border-2 border-primary border-t-transparent animate-spin" />
|
||||
加载角色配置...
|
||||
<CardContent className="theme-card-content space-y-8">
|
||||
{/* ── Subagents 目录 ──────────────────────────────────────────── */}
|
||||
<section className="space-y-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="flex h-9 w-9 items-center justify-center rounded-xl border border-primary/20 bg-primary/10 text-primary">
|
||||
<Sparkles className="h-4 w-4" />
|
||||
</div>
|
||||
<h3 className="text-base font-semibold text-foreground">Subagents 目录</h3>
|
||||
</div>
|
||||
) : (
|
||||
<div className="divide-y divide-border/50">
|
||||
{ROLES.map(role => {
|
||||
const state = roleStates[role] || { providerId: null, model: '' };
|
||||
const isDirty = roles.find(r => r.role === role)?.providerId !== state.providerId ||
|
||||
(roles.find(r => r.role === role)?.model || '') !== state.model;
|
||||
|
||||
return (
|
||||
<div key={role} className="flex flex-col md:flex-row items-start md:items-center gap-4 py-5 px-1 hover:bg-accent/40 transition-colors rounded-lg">
|
||||
<div className="w-full md:w-1/3 space-y-1.5">
|
||||
<Label className="text-base font-semibold text-foreground">
|
||||
{ROLE_LABELS[role]?.label || role}
|
||||
</Label>
|
||||
<p className="text-sm text-muted-foreground leading-relaxed">
|
||||
{ROLE_LABELS[role]?.desc}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="w-full md:w-2/3 flex flex-col sm:flex-row items-start sm:items-center gap-3">
|
||||
<div className="flex-1 w-full space-y-1">
|
||||
<Label className="text-xs text-muted-foreground">提供商</Label>
|
||||
<Select
|
||||
value={state.providerId || ''}
|
||||
onValueChange={(v) => handleProviderChange(role, v)}
|
||||
>
|
||||
<SelectTrigger className="bg-muted/50 border-border text-foreground">
|
||||
<SelectValue placeholder="选择提供商" />
|
||||
</SelectTrigger>
|
||||
<SelectContent className="bg-popover border-border text-foreground">
|
||||
{enabledProviders.map(p => (
|
||||
<SelectItem key={p.id} value={p.id} description={p.type} className="focus:bg-accent focus:text-primary">
|
||||
{p.name}
|
||||
</SelectItem>
|
||||
))}
|
||||
{enabledProviders.length === 0 && (
|
||||
<div className="px-2 py-3 text-xs text-danger text-center border-t border-border/60">
|
||||
无可用提供商。请先添加并启用。
|
||||
|
||||
<Alert className="border-primary/20 bg-primary/5">
|
||||
<Bot className="h-4 w-4 text-primary" />
|
||||
<AlertTitle>流程编排由 kernel 自动驱动</AlertTitle>
|
||||
<AlertDescription>
|
||||
kernel 根据 session state 与 planner 选择注册式 subagent 执行。下方展示的是当前已注册的 subagent 及其能力标签。
|
||||
</AlertDescription>
|
||||
</Alert>
|
||||
|
||||
{isSubagentsLoading ? (
|
||||
<div className="h-32 flex items-center justify-center text-muted-foreground gap-2">
|
||||
<div className="w-4 h-4 rounded-full border-2 border-primary border-t-transparent animate-spin" />
|
||||
加载 subagent 目录...
|
||||
</div>
|
||||
) : (
|
||||
<div className="space-y-4">
|
||||
<div className="grid gap-4 md:grid-cols-3">
|
||||
<Card className="border-border/70 bg-card/70">
|
||||
<CardContent className="p-5">
|
||||
<div className="text-xs uppercase tracking-[0.24em] text-muted-foreground">Subagents</div>
|
||||
<div className="mt-2 text-3xl font-semibold tracking-tight text-foreground">{subagents.length}</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
<Card className="border-border/70 bg-card/70">
|
||||
<CardContent className="p-5">
|
||||
<div className="text-xs uppercase tracking-[0.24em] text-muted-foreground">Built-in</div>
|
||||
<div className="mt-2 text-3xl font-semibold tracking-tight text-foreground">
|
||||
{subagents.filter((item) => item.source === 'built-in').length}
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
<Card className="border-border/70 bg-card/70">
|
||||
<CardContent className="p-5">
|
||||
<div className="text-xs uppercase tracking-[0.24em] text-muted-foreground">模型角色</div>
|
||||
<div className="mt-2 text-3xl font-semibold tracking-tight text-foreground">
|
||||
{new Set(subagents.map((item) => item.modelRole).filter(Boolean)).size}
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
</div>
|
||||
|
||||
<Card className="border-border/70 bg-card/70">
|
||||
<CardContent className="p-0">
|
||||
<Table>
|
||||
<TableHeader>
|
||||
<TableRow>
|
||||
<TableHead className="pl-5">Subagent</TableHead>
|
||||
<TableHead>能力定位</TableHead>
|
||||
<TableHead>模型角色</TableHead>
|
||||
<TableHead>标签</TableHead>
|
||||
<TableHead className="pr-5 text-right">状态</TableHead>
|
||||
</TableRow>
|
||||
</TableHeader>
|
||||
<TableBody>
|
||||
{subagents.map((subagent) => (
|
||||
<TableRow key={subagent.name}>
|
||||
<TableCell className="pl-5 align-top">
|
||||
<div className="space-y-2">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="font-semibold text-foreground">{subagent.name}</span>
|
||||
<Badge className={getSourceBadgeClass(subagent.source)}>{subagent.source}</Badge>
|
||||
</div>
|
||||
<div className="text-sm text-muted-foreground">{subagent.description}</div>
|
||||
</div>
|
||||
)}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</div>
|
||||
</TableCell>
|
||||
<TableCell className="align-top text-sm text-muted-foreground whitespace-normal">
|
||||
{subagent.whenToUse}
|
||||
</TableCell>
|
||||
<TableCell className="align-top">
|
||||
<Badge className={getModelRoleBadgeClass(subagent.modelRole)}>
|
||||
<Route className="h-3 w-3" />
|
||||
{subagent.modelRole ?? '未绑定'}
|
||||
</Badge>
|
||||
</TableCell>
|
||||
<TableCell className="align-top">
|
||||
<div className="flex flex-wrap gap-1.5 max-w-[260px]">
|
||||
{subagent.tags.map((tag) => (
|
||||
<Badge key={tag} variant="outline" className="bg-muted/30">{tag}</Badge>
|
||||
))}
|
||||
</div>
|
||||
</TableCell>
|
||||
<TableCell className="pr-5 align-top text-right">
|
||||
<Badge className={subagent.resumable ? 'border-success/20 bg-success/10 text-success' : 'border-border bg-muted/40 text-muted-foreground'}>
|
||||
{subagent.resumable ? '可恢复' : '一次性'}
|
||||
</Badge>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
))}
|
||||
</TableBody>
|
||||
</Table>
|
||||
</CardContent>
|
||||
</Card>
|
||||
</div>
|
||||
)}
|
||||
</section>
|
||||
|
||||
<div className="flex-1 w-full space-y-1">
|
||||
<Label className="text-xs text-muted-foreground">使用的模型</Label>
|
||||
<ModelCombobox
|
||||
providerType={providers.find(p => p.id === state.providerId)?.type}
|
||||
value={state.model}
|
||||
onChange={(model) => handleModelChange(role, model)}
|
||||
placeholder="选择或输入模型..."
|
||||
disabled={!state.providerId}
|
||||
className="w-full"
|
||||
/>
|
||||
</div>
|
||||
<Separator />
|
||||
|
||||
<div className="pt-5 flex-shrink-0">
|
||||
<Button
|
||||
size="sm"
|
||||
onClick={() => handleSave(role)}
|
||||
disabled={!isDirty || saveMutation.isPending}
|
||||
variant={isDirty ? 'default' : 'secondary'}
|
||||
className={`transition-all ${isDirty ? 'bg-warning/15 text-warning border border-warning/30 hover:bg-warning/25' : 'bg-muted/50 text-muted-foreground border border-transparent'}`}
|
||||
>
|
||||
<Save className="w-4 h-4 mr-1.5" />
|
||||
{isDirty ? '保存更改' : '已保存'}
|
||||
</Button>
|
||||
{/* ── 模型角色路由 ─────────────────────────────────────────────── */}
|
||||
<section className="space-y-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="flex h-9 w-9 items-center justify-center rounded-xl border border-warning/25 bg-warning/10 text-warning">
|
||||
<Workflow className="h-4 w-4" />
|
||||
</div>
|
||||
<h3 className="text-base font-semibold text-foreground">模型角色路由</h3>
|
||||
</div>
|
||||
|
||||
<Alert className="border-warning/20 bg-warning/5">
|
||||
<ShieldCheck className="h-4 w-4 text-warning" />
|
||||
<AlertTitle>这里配置的是底层模型路由,不是流程角色编排</AlertTitle>
|
||||
<AlertDescription>
|
||||
Planner / Specialist 决定由哪个 provider/model 响应 LLM 调用。subagent 的注册、标签和执行顺序由 kernel 控制。
|
||||
</AlertDescription>
|
||||
</Alert>
|
||||
|
||||
{isLoading ? (
|
||||
<div className="h-32 flex items-center justify-center text-muted-foreground gap-2">
|
||||
<div className="w-4 h-4 rounded-full border-2 border-primary border-t-transparent animate-spin" />
|
||||
加载模型角色路由...
|
||||
</div>
|
||||
) : (
|
||||
<div className="divide-y divide-border/50">
|
||||
{ROLES.map(role => {
|
||||
const state = roleStates[role] || { providerId: null, model: '' };
|
||||
const isDirty = roles.find(r => r.role === role)?.providerId !== state.providerId ||
|
||||
(roles.find(r => r.role === role)?.model || '') !== state.model;
|
||||
const consumers = subagents.filter((item) => item.modelRole === role);
|
||||
|
||||
return (
|
||||
<div key={role} className="py-5 px-1">
|
||||
<div className="flex flex-col gap-4 rounded-lg border border-border/60 bg-card/40 p-4 hover:bg-accent/20 transition-colors">
|
||||
<div className="space-y-1.5">
|
||||
<div className="flex items-center gap-2">
|
||||
<Label className="text-base font-semibold text-foreground">
|
||||
{ROLE_LABELS[role]?.label || role}
|
||||
</Label>
|
||||
<Badge variant="outline" className="bg-muted/30">
|
||||
{consumers.length} 个 subagent
|
||||
</Badge>
|
||||
</div>
|
||||
<p className="text-sm text-muted-foreground leading-relaxed">
|
||||
{ROLE_LABELS[role]?.desc}
|
||||
</p>
|
||||
{consumers.length > 0 && (
|
||||
<div className="flex flex-wrap gap-1.5 pt-1">
|
||||
{consumers.map((item) => (
|
||||
<Badge key={item.name} className="border-primary/15 bg-primary/5 text-primary">
|
||||
{item.name}
|
||||
</Badge>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<Separator />
|
||||
|
||||
<div className="flex flex-col sm:flex-row items-start sm:items-center gap-3">
|
||||
<div className="flex-1 w-full space-y-1">
|
||||
<Label className="text-xs text-muted-foreground">提供商</Label>
|
||||
<Select
|
||||
value={state.providerId || ''}
|
||||
onValueChange={(v) => handleProviderChange(role, v)}
|
||||
>
|
||||
<SelectTrigger className="bg-muted/50 border-border text-foreground">
|
||||
<SelectValue placeholder="选择提供商" />
|
||||
</SelectTrigger>
|
||||
<SelectContent className="bg-popover border-border text-foreground">
|
||||
{enabledProviders.map(p => (
|
||||
<SelectItem key={p.id} value={p.id} description={p.type} className="focus:bg-accent focus:text-primary">
|
||||
{p.name}
|
||||
</SelectItem>
|
||||
))}
|
||||
{enabledProviders.length === 0 && (
|
||||
<div className="px-2 py-3 text-xs text-danger text-center border-t border-border/60">
|
||||
无可用提供商。请先添加并启用。
|
||||
</div>
|
||||
)}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</div>
|
||||
|
||||
<div className="flex-1 w-full space-y-1">
|
||||
<Label className="text-xs text-muted-foreground">使用的模型</Label>
|
||||
<ModelCombobox
|
||||
providerType={providers.find(p => p.id === state.providerId)?.type}
|
||||
value={state.model}
|
||||
onChange={(model) => handleModelChange(role, model)}
|
||||
placeholder="选择或输入模型..."
|
||||
disabled={!state.providerId}
|
||||
className="w-full"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="pt-5 flex-shrink-0">
|
||||
<Button
|
||||
size="sm"
|
||||
onClick={() => handleSave(role)}
|
||||
disabled={!isDirty || saveMutation.isPending}
|
||||
variant={isDirty ? 'default' : 'secondary'}
|
||||
className={`transition-all ${isDirty ? 'bg-warning/15 text-warning border border-warning/30 hover:bg-warning/25' : 'bg-muted/50 text-muted-foreground border border-transparent'}`}
|
||||
>
|
||||
<Save className="w-4 h-4 mr-1.5" />
|
||||
{isDirty ? '保存更改' : '已保存'}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
</section>
|
||||
</CardContent>
|
||||
</Card>
|
||||
);
|
||||
|
||||
@@ -4,7 +4,12 @@ import userEvent from '@testing-library/user-event';
|
||||
import type { ReactNode } from 'react';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { RoleAssignment } from '../RoleAssignment';
|
||||
import { fetchProviders, fetchRoles, setRole } from '@/services/llmProviderService';
|
||||
import {
|
||||
fetchKernelSubagents,
|
||||
fetchProviders,
|
||||
fetchRoles,
|
||||
setRole,
|
||||
} from '@/services/llmProviderService';
|
||||
|
||||
vi.mock('sonner', () => ({
|
||||
toast: {
|
||||
@@ -18,6 +23,7 @@ vi.mock('@/services/llmProviderService', async () => {
|
||||
return {
|
||||
...actual,
|
||||
fetchProviders: vi.fn(),
|
||||
fetchKernelSubagents: vi.fn(),
|
||||
fetchRoles: vi.fn(),
|
||||
setRole: vi.fn(),
|
||||
fetchModelSuggestions: vi.fn().mockResolvedValue({
|
||||
@@ -40,7 +46,7 @@ function renderWithQuery(ui: ReactNode) {
|
||||
}
|
||||
|
||||
describe('RoleAssignment', () => {
|
||||
it('renders role cards and supports provider/model editing', async () => {
|
||||
it('renders subagent directory and model role routing', async () => {
|
||||
vi.mocked(fetchProviders).mockResolvedValueOnce([
|
||||
{
|
||||
id: 'p1',
|
||||
@@ -65,6 +71,29 @@ describe('RoleAssignment', () => {
|
||||
},
|
||||
]);
|
||||
|
||||
vi.mocked(fetchKernelSubagents).mockResolvedValueOnce([
|
||||
{
|
||||
kind: 'subagent',
|
||||
name: 'review:triage',
|
||||
source: 'built-in',
|
||||
description: '根据变更范围决定 review 域与审查模式',
|
||||
whenToUse: '当需要规划任务时',
|
||||
modelRole: 'planner',
|
||||
tags: ['review', 'planner', 'triage'],
|
||||
resumable: true,
|
||||
},
|
||||
{
|
||||
kind: 'subagent',
|
||||
name: 'review:full_review',
|
||||
source: 'built-in',
|
||||
description: '执行一次完整自主代码审查',
|
||||
whenToUse: '当 triage 生成审查提示后执行完整审查',
|
||||
modelRole: 'specialist',
|
||||
tags: ['review', 'specialist', 'full-review', 'autonomous-review'],
|
||||
resumable: true,
|
||||
},
|
||||
]);
|
||||
|
||||
vi.mocked(setRole).mockResolvedValue({
|
||||
role: 'planner',
|
||||
providerId: 'p1',
|
||||
@@ -76,11 +105,12 @@ describe('RoleAssignment', () => {
|
||||
const user = userEvent.setup();
|
||||
renderWithQuery(<RoleAssignment />);
|
||||
|
||||
expect(await screen.findByText('角色分配')).toBeInTheDocument();
|
||||
expect(await screen.findByText('规划器 Planner')).toBeInTheDocument();
|
||||
expect(await screen.findByText('Subagents 与模型路由')).toBeInTheDocument();
|
||||
expect((await screen.findAllByText('review:triage')).length).toBeGreaterThan(0);
|
||||
expect(screen.getByText('模型角色路由')).toBeInTheDocument();
|
||||
expect(screen.getByText('Planner')).toBeInTheDocument();
|
||||
expect(screen.getByText('Specialist')).toBeInTheDocument();
|
||||
|
||||
// Radix Select renders placeholder in a span with pointer-events: none.
|
||||
// Click the trigger button (parent) instead of the placeholder text.
|
||||
const providerPlaceholders = screen.getAllByText('选择提供商');
|
||||
const triggerButton = providerPlaceholders[0].closest('button')!;
|
||||
await user.click(triggerButton);
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
import { useState, useEffect } from 'react';
|
||||
import { NavLink, Outlet, useLocation } from 'react-router-dom';
|
||||
import { Button } from '@/components/ui/button';
|
||||
import { LogOut, Bot, FolderGit2, Sliders, Bell, Menu, X, PanelLeftClose, PanelLeftOpen, FileSearch, Sun, Moon, Palette } from 'lucide-react';
|
||||
import { LogOut, Bot, FolderGit2, Sliders, Bell, Menu, X, PanelLeftClose, PanelLeftOpen, FileSearch, Sun, Moon, Palette, Waypoints } from 'lucide-react';
|
||||
import { useTheme } from 'next-themes';
|
||||
import { Select, SelectContent, SelectItem, SelectTrigger } from '@/components/ui/select';
|
||||
import { isColorPalette, useColorPalette } from '@/hooks/useColorPalette';
|
||||
|
||||
const navItems = [
|
||||
{ path: '/sessions', label: '审查会话', icon: Waypoints },
|
||||
{ path: '/repos', label: '仓库管理', icon: FolderGit2 },
|
||||
{ path: '/config', label: '系统配置', icon: Sliders },
|
||||
{ path: '/notifications', label: '通知管理', icon: Bell },
|
||||
|
||||
368
frontend/src/pages/ReviewSessionsPage.tsx
Normal file
368
frontend/src/pages/ReviewSessionsPage.tsx
Normal file
@@ -0,0 +1,368 @@
|
||||
import { useEffect, useMemo, useState } from 'react';
|
||||
import { useQuery } from '@tanstack/react-query';
|
||||
import { AlertTriangle, Clock3, ListTodo, RefreshCw, Waypoints } from 'lucide-react';
|
||||
import { Badge } from '@/components/ui/badge';
|
||||
import { Button } from '@/components/ui/button';
|
||||
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card';
|
||||
import { Skeleton } from '@/components/ui/skeleton';
|
||||
import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs';
|
||||
import {
|
||||
fetchReviewSessionDetail,
|
||||
fetchReviewSessions,
|
||||
type ReviewPlanStepDto,
|
||||
type ReviewSessionSummaryRecordDto,
|
||||
type ReviewTimelineEntryDto,
|
||||
} from '@/services/reviewSessionService';
|
||||
|
||||
const statusLabelMap: Record<ReviewSessionSummaryRecordDto['summary']['status'], string> = {
|
||||
queued: '排队中',
|
||||
planning: '制定计划',
|
||||
executing: '执行中',
|
||||
awaiting_human_feedback: '等待人工反馈',
|
||||
completed: '已完成',
|
||||
failed: '失败',
|
||||
ignored: '已忽略',
|
||||
};
|
||||
|
||||
const statusClassMap: Record<ReviewSessionSummaryRecordDto['summary']['status'], string> = {
|
||||
queued: 'border-border bg-muted/60 text-muted-foreground',
|
||||
planning: 'border-info/30 bg-info/10 text-info',
|
||||
executing: 'border-primary/30 bg-primary/10 text-primary',
|
||||
awaiting_human_feedback: 'border-warning/30 bg-warning/15 text-warning-foreground',
|
||||
completed: 'border-success/30 bg-success/15 text-success',
|
||||
failed: 'border-destructive/30 bg-destructive/10 text-destructive',
|
||||
ignored: 'border-border bg-muted/50 text-muted-foreground',
|
||||
};
|
||||
|
||||
const planStatusClassMap: Record<ReviewPlanStepDto['status'], string> = {
|
||||
pending: 'border-border bg-muted/40 text-muted-foreground',
|
||||
queued: 'border-info/20 bg-info/10 text-info',
|
||||
running: 'border-primary/20 bg-primary/10 text-primary',
|
||||
completed: 'border-success/20 bg-success/10 text-success',
|
||||
failed: 'border-destructive/20 bg-destructive/10 text-destructive',
|
||||
skipped: 'border-border bg-muted/40 text-muted-foreground',
|
||||
};
|
||||
|
||||
const timelineToneClassMap: Record<ReviewTimelineEntryDto['tone'], string> = {
|
||||
neutral: 'border-border bg-card/80',
|
||||
success: 'border-success/20 bg-success/5',
|
||||
warning: 'border-warning/20 bg-warning/5',
|
||||
danger: 'border-destructive/20 bg-destructive/5',
|
||||
};
|
||||
|
||||
function formatDate(value?: string): string {
|
||||
if (!value) return '—';
|
||||
return new Date(value).toLocaleString('zh-CN', {
|
||||
month: '2-digit',
|
||||
day: '2-digit',
|
||||
hour: '2-digit',
|
||||
minute: '2-digit',
|
||||
});
|
||||
}
|
||||
|
||||
function truncateSha(value?: string): string {
|
||||
if (!value) return '—';
|
||||
return value.slice(0, 8);
|
||||
}
|
||||
|
||||
function SessionMetric({
|
||||
label,
|
||||
value,
|
||||
icon: Icon,
|
||||
}: {
|
||||
label: string;
|
||||
value: string | number;
|
||||
icon: typeof Clock3;
|
||||
}) {
|
||||
return (
|
||||
<Card className="gap-0 border-border/70 bg-card/70 backdrop-blur-sm">
|
||||
<CardContent className="flex items-center gap-4 p-5">
|
||||
<div className="flex h-11 w-11 items-center justify-center rounded-2xl border border-primary/20 bg-primary/10 text-primary">
|
||||
<Icon className="h-5 w-5" />
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-xs uppercase tracking-[0.24em] text-muted-foreground">{label}</div>
|
||||
<div className="mt-1 text-2xl font-semibold tracking-tight text-foreground">{value}</div>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
export default function ReviewSessionsPage() {
|
||||
const [selectedSessionId, setSelectedSessionId] = useState<string | null>(null);
|
||||
|
||||
const sessionsQuery = useQuery({
|
||||
queryKey: ['review-sessions'],
|
||||
queryFn: fetchReviewSessions,
|
||||
refetchInterval: 15000,
|
||||
});
|
||||
|
||||
useEffect(() => {
|
||||
if (!selectedSessionId && sessionsQuery.data?.length) {
|
||||
setSelectedSessionId(sessionsQuery.data[0].session.id);
|
||||
}
|
||||
}, [selectedSessionId, sessionsQuery.data]);
|
||||
|
||||
const detailQuery = useQuery({
|
||||
queryKey: ['review-session-detail', selectedSessionId],
|
||||
queryFn: () => fetchReviewSessionDetail(selectedSessionId as string),
|
||||
enabled: !!selectedSessionId,
|
||||
refetchInterval: 15000,
|
||||
});
|
||||
|
||||
const metrics = useMemo(() => {
|
||||
const sessions = sessionsQuery.data ?? [];
|
||||
return {
|
||||
total: sessions.length,
|
||||
active: sessions.filter(({ summary }) => summary.status === 'planning' || summary.status === 'executing').length,
|
||||
waiting: sessions.filter(({ summary }) => summary.status === 'awaiting_human_feedback').length,
|
||||
findings: sessions.reduce((total, item) => total + item.summary.findingCount, 0),
|
||||
};
|
||||
}, [sessionsQuery.data]);
|
||||
|
||||
return (
|
||||
<div className="theme-page-frame">
|
||||
<div className="theme-page-content space-y-6">
|
||||
<div className="grid gap-4 md:grid-cols-2 xl:grid-cols-4">
|
||||
<SessionMetric label="PR 会话" value={metrics.total} icon={Waypoints} />
|
||||
<SessionMetric label="执行中" value={metrics.active} icon={RefreshCw} />
|
||||
<SessionMetric label="待人工确认" value={metrics.waiting} icon={AlertTriangle} />
|
||||
<SessionMetric label="累计 Findings" value={metrics.findings} icon={ListTodo} />
|
||||
</div>
|
||||
|
||||
<div className="grid gap-6 xl:grid-cols-[360px_minmax(0,1fr)]">
|
||||
<Card className="border-border/70 bg-card/80 backdrop-blur-sm">
|
||||
<CardHeader className="border-b border-border/60 pb-4">
|
||||
<div className="flex items-center justify-between gap-3">
|
||||
<div>
|
||||
<CardTitle className="text-xl">审查会话</CardTitle>
|
||||
<CardDescription>每个 PR head 对应一个 session,支持计划与继续执行。</CardDescription>
|
||||
</div>
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={() => sessionsQuery.refetch()}
|
||||
className="border-border/70"
|
||||
>
|
||||
刷新
|
||||
</Button>
|
||||
</div>
|
||||
</CardHeader>
|
||||
<CardContent className="space-y-3 p-4">
|
||||
{sessionsQuery.isLoading && (
|
||||
<div className="space-y-3">
|
||||
{Array.from({ length: 5 }).map((_, index) => (
|
||||
<Skeleton key={index} className="h-24 rounded-2xl" />
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{!sessionsQuery.isLoading && sessionsQuery.data?.length === 0 && (
|
||||
<div className="rounded-2xl border border-dashed border-border/70 bg-muted/20 p-6 text-sm text-muted-foreground">
|
||||
还没有审查会话。收到新的 PR webhook 后,这里会出现 session 与执行计划。
|
||||
</div>
|
||||
)}
|
||||
|
||||
{sessionsQuery.data?.map(({ session, summary }) => {
|
||||
const selected = selectedSessionId === session.id;
|
||||
return (
|
||||
<button
|
||||
key={session.id}
|
||||
type="button"
|
||||
onClick={() => setSelectedSessionId(session.id)}
|
||||
className={`w-full rounded-2xl border p-4 text-left transition-all ${
|
||||
selected
|
||||
? 'border-primary/40 bg-primary/10 shadow-sm'
|
||||
: 'border-border/70 bg-card/60 hover:border-primary/20 hover:bg-accent/30'
|
||||
}`}
|
||||
>
|
||||
<div className="flex items-start justify-between gap-3">
|
||||
<div>
|
||||
<div className="font-semibold tracking-tight text-foreground">
|
||||
{summary.owner}/{summary.repo}
|
||||
{summary.prNumber ? ` #${summary.prNumber}` : ''}
|
||||
</div>
|
||||
<div className="mt-1 font-mono text-xs text-muted-foreground">{summary.scopeKey}</div>
|
||||
</div>
|
||||
<Badge className={statusClassMap[summary.status]}>{statusLabelMap[summary.status]}</Badge>
|
||||
</div>
|
||||
<div className="mt-4 grid grid-cols-2 gap-3 text-sm">
|
||||
<div>
|
||||
<div className="text-muted-foreground">当前步骤</div>
|
||||
<div className="mt-1 font-medium text-foreground">{summary.currentStep ?? '等待计划'}</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-muted-foreground">Head SHA</div>
|
||||
<div className="mt-1 font-mono text-foreground">{truncateSha(summary.headSha)}</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-muted-foreground">Findings</div>
|
||||
<div className="mt-1 font-medium text-foreground">{summary.findingCount}</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-muted-foreground">更新时间</div>
|
||||
<div className="mt-1 font-medium text-foreground">{formatDate(summary.updatedAt)}</div>
|
||||
</div>
|
||||
</div>
|
||||
</button>
|
||||
);
|
||||
})}
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
<Card className="border-border/70 bg-card/80 backdrop-blur-sm">
|
||||
<CardHeader className="border-b border-border/60 pb-4">
|
||||
<div className="flex items-center justify-between gap-3">
|
||||
<div>
|
||||
<CardTitle className="text-xl">会话详情</CardTitle>
|
||||
<CardDescription>审查结果、运行日志按 session 聚合。</CardDescription>
|
||||
</div>
|
||||
{detailQuery.data && (
|
||||
<Badge className={statusClassMap[detailQuery.data.summary.status]}>
|
||||
{statusLabelMap[detailQuery.data.summary.status]}
|
||||
</Badge>
|
||||
)}
|
||||
</div>
|
||||
</CardHeader>
|
||||
<CardContent className="p-4">
|
||||
{detailQuery.isLoading && <Skeleton className="h-[640px] rounded-2xl" />}
|
||||
|
||||
{!detailQuery.isLoading && !detailQuery.data && (
|
||||
<div className="rounded-2xl border border-dashed border-border/70 bg-muted/20 p-8 text-sm text-muted-foreground">
|
||||
选择一个 session 查看它的执行计划与时间线。
|
||||
</div>
|
||||
)}
|
||||
|
||||
{detailQuery.data && (
|
||||
<div className="space-y-4">
|
||||
<div className="grid gap-3 md:grid-cols-4">
|
||||
<div className="rounded-2xl border border-border/70 bg-muted/25 p-4">
|
||||
<div className="text-xs uppercase tracking-[0.2em] text-muted-foreground">Session</div>
|
||||
<div className="mt-2 font-mono text-sm text-foreground">{detailQuery.data.session.id.slice(0, 8)}</div>
|
||||
</div>
|
||||
<div className="rounded-2xl border border-border/70 bg-muted/25 p-4">
|
||||
<div className="text-xs uppercase tracking-[0.2em] text-muted-foreground">Head SHA</div>
|
||||
<div className="mt-2 font-mono text-sm text-foreground">{truncateSha(detailQuery.data.summary.headSha)}</div>
|
||||
</div>
|
||||
<div className="rounded-2xl border border-border/70 bg-muted/25 p-4">
|
||||
<div className="text-xs uppercase tracking-[0.2em] text-muted-foreground">当前步骤</div>
|
||||
<div className="mt-2 text-sm font-medium text-foreground">{detailQuery.data.summary.currentStep ?? '无'}</div>
|
||||
</div>
|
||||
<div className="rounded-2xl border border-border/70 bg-muted/25 p-4">
|
||||
<div className="text-xs uppercase tracking-[0.2em] text-muted-foreground">待执行任务</div>
|
||||
<div className="mt-2 text-sm font-medium text-foreground">{detailQuery.data.summary.pendingTaskCount}</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<Tabs defaultValue="results" className="space-y-4">
|
||||
<TabsList className="grid w-full grid-cols-2">
|
||||
<TabsTrigger value="results">审查结果</TabsTrigger>
|
||||
<TabsTrigger value="logs">运行日志</TabsTrigger>
|
||||
</TabsList>
|
||||
|
||||
<TabsContent value="results" className="space-y-4">
|
||||
<div>
|
||||
<h4 className="mb-3 text-sm font-semibold uppercase tracking-wider text-muted-foreground">Findings</h4>
|
||||
{detailQuery.data.runDetails?.findings.length ? (
|
||||
detailQuery.data.runDetails.findings.map((finding) => (
|
||||
<div key={finding.id} className="mb-3 rounded-2xl border border-border/70 bg-card/60 p-4">
|
||||
<div className="flex items-start justify-between gap-3">
|
||||
<div>
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-base">{finding.severity === 'high' ? '🔴' : finding.severity === 'medium' ? '🟡' : '🔵'}</span>
|
||||
<span className="font-semibold text-foreground">{finding.title}</span>
|
||||
</div>
|
||||
<div className="mt-1 text-sm text-muted-foreground">{finding.path}:{finding.line}</div>
|
||||
</div>
|
||||
<div className="flex gap-2">
|
||||
<Badge variant="outline">{finding.category}</Badge>
|
||||
<Badge className={finding.published ? 'bg-success/15 text-success border-success/20' : 'bg-warning/15 text-warning-foreground border-warning/20'}>
|
||||
{finding.published ? '已发布' : '待处理'}
|
||||
</Badge>
|
||||
</div>
|
||||
</div>
|
||||
{finding.detail && <div className="mt-3 text-sm text-muted-foreground">{finding.detail}</div>}
|
||||
{finding.evidence && <div className="mt-2 rounded-lg border border-border/50 bg-muted/30 p-3 font-mono text-xs text-muted-foreground">{finding.evidence}</div>}
|
||||
{finding.suggestion && <div className="mt-2 text-sm text-foreground">💡 {finding.suggestion}</div>}
|
||||
</div>
|
||||
))
|
||||
) : (
|
||||
<div className="rounded-2xl border border-dashed border-border/70 bg-muted/20 p-6 text-sm text-muted-foreground">
|
||||
当前 session 暂无 findings。
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<h4 className="mb-3 text-sm font-semibold uppercase tracking-wider text-muted-foreground">Gitea 评论</h4>
|
||||
{detailQuery.data.runDetails?.comments.length ? (
|
||||
detailQuery.data.runDetails.comments.map((comment) => (
|
||||
<div key={comment.id} className="mb-3 rounded-2xl border border-border/70 bg-card/60 p-4">
|
||||
<div className="flex items-center justify-between gap-3">
|
||||
<Badge variant="outline">{comment.status}</Badge>
|
||||
<div className="font-mono text-xs text-muted-foreground">{formatDate(comment.createdAt)}</div>
|
||||
</div>
|
||||
{(comment.path || comment.line) && (
|
||||
<div className="mt-2 text-xs font-mono text-muted-foreground">
|
||||
{[comment.path, comment.line].filter(Boolean).join(':')}
|
||||
</div>
|
||||
)}
|
||||
<pre className="mt-3 whitespace-pre-wrap break-words text-sm text-foreground">{comment.body}</pre>
|
||||
</div>
|
||||
))
|
||||
) : (
|
||||
<div className="rounded-2xl border border-dashed border-border/70 bg-muted/20 p-6 text-sm text-muted-foreground">
|
||||
当前 session 暂无评论产物。
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</TabsContent>
|
||||
|
||||
<TabsContent value="logs" className="space-y-4">
|
||||
<div>
|
||||
<h4 className="mb-3 text-sm font-semibold uppercase tracking-wider text-muted-foreground">执行步骤</h4>
|
||||
<div className="flex flex-wrap gap-2">
|
||||
{detailQuery.data.plan.map((step) => (
|
||||
<div
|
||||
key={step.key}
|
||||
className={`flex items-center gap-2 rounded-xl border px-3 py-2 text-sm ${planStatusClassMap[step.status]}`}
|
||||
>
|
||||
<span className="font-medium">{step.label}</span>
|
||||
<Badge className={planStatusClassMap[step.status]}>{step.status}</Badge>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<h4 className="mb-3 text-sm font-semibold uppercase tracking-wider text-muted-foreground">事件流</h4>
|
||||
{detailQuery.data.timeline.length === 0 && (
|
||||
<div className="rounded-2xl border border-dashed border-border/70 bg-muted/20 p-6 text-sm text-muted-foreground">
|
||||
当前 session 还没有时间线事件。
|
||||
</div>
|
||||
)}
|
||||
{detailQuery.data.timeline.map((entry) => (
|
||||
<div
|
||||
key={entry.id}
|
||||
className={`mb-2 rounded-2xl border p-4 ${timelineToneClassMap[entry.tone]}`}
|
||||
>
|
||||
<div className="flex items-center justify-between gap-3">
|
||||
<div className="font-semibold text-foreground">{entry.title}</div>
|
||||
<div className="font-mono text-xs text-muted-foreground">{formatDate(entry.timestamp)}</div>
|
||||
</div>
|
||||
<div className="mt-2 text-sm text-muted-foreground">{entry.detail}</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</TabsContent>
|
||||
</Tabs>
|
||||
</div>
|
||||
)}
|
||||
</CardContent>
|
||||
</Card>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -23,6 +23,17 @@ export interface RoleAssignmentDto {
|
||||
model: string | null;
|
||||
}
|
||||
|
||||
export interface KernelSubagentDto {
|
||||
kind: 'subagent';
|
||||
name: string;
|
||||
source: 'built-in' | 'custom' | 'plugin';
|
||||
description: string;
|
||||
whenToUse: string;
|
||||
modelRole?: string;
|
||||
tags: string[];
|
||||
resumable?: boolean;
|
||||
}
|
||||
|
||||
export interface TestResult {
|
||||
success: boolean;
|
||||
latencyMs?: number;
|
||||
@@ -85,6 +96,11 @@ export const setRole = async (role: string, providerId: string | null, model: st
|
||||
return response.data;
|
||||
};
|
||||
|
||||
export const fetchKernelSubagents = async (): Promise<KernelSubagentDto[]> => {
|
||||
const response = await api.get<{ data: KernelSubagentDto[] }>('/review/kernel/subagents');
|
||||
return response.data.data;
|
||||
};
|
||||
|
||||
export const testProvider = async (id: string): Promise<TestResult> => {
|
||||
const response = await api.post<TestResult>(`/llm/providers/${id}/test`);
|
||||
return response.data;
|
||||
|
||||
122
frontend/src/services/reviewSessionService.ts
Normal file
122
frontend/src/services/reviewSessionService.ts
Normal file
@@ -0,0 +1,122 @@
|
||||
import api from '@/lib/api';
|
||||
|
||||
export interface ReviewSessionSummaryRecordDto {
|
||||
session: {
|
||||
id: string;
|
||||
scopeType: 'pull_request' | 'commit';
|
||||
scopeKey: string;
|
||||
metadata: Record<string, unknown>;
|
||||
createdAt: string;
|
||||
updatedAt: string;
|
||||
lastRunId?: string;
|
||||
};
|
||||
summary: {
|
||||
sessionId: string;
|
||||
scopeKey: string;
|
||||
scopeType: 'pull_request' | 'commit';
|
||||
owner?: string;
|
||||
repo?: string;
|
||||
prNumber?: number;
|
||||
headSha?: string;
|
||||
status:
|
||||
| 'queued'
|
||||
| 'planning'
|
||||
| 'executing'
|
||||
| 'awaiting_human_feedback'
|
||||
| 'completed'
|
||||
| 'failed'
|
||||
| 'ignored';
|
||||
currentStep?: string;
|
||||
findingCount: number;
|
||||
pendingTaskCount: number;
|
||||
updatedAt: string;
|
||||
};
|
||||
}
|
||||
|
||||
export interface ReviewPlanStepDto {
|
||||
key: string;
|
||||
label: string;
|
||||
description: string;
|
||||
status: 'pending' | 'queued' | 'running' | 'completed' | 'failed' | 'skipped';
|
||||
progressText?: string;
|
||||
}
|
||||
|
||||
export interface ReviewTimelineEntryDto {
|
||||
id: string;
|
||||
timestamp: string;
|
||||
title: string;
|
||||
detail: string;
|
||||
tone: 'neutral' | 'success' | 'warning' | 'danger';
|
||||
}
|
||||
|
||||
export interface ReviewSessionDetailDto {
|
||||
session: ReviewSessionSummaryRecordDto['session'];
|
||||
summary: ReviewSessionSummaryRecordDto['summary'];
|
||||
checkpoint: {
|
||||
state: Record<string, unknown>;
|
||||
pendingTasks: Array<{ kind: 'skill' | 'subagent'; name: string; input?: Record<string, unknown> }>;
|
||||
stopReason?: string;
|
||||
} | null;
|
||||
plan: ReviewPlanStepDto[];
|
||||
timeline: ReviewTimelineEntryDto[];
|
||||
events: Array<{
|
||||
id: string;
|
||||
sessionId: string;
|
||||
eventType: string;
|
||||
payload: Record<string, unknown>;
|
||||
createdAt: string;
|
||||
}>;
|
||||
runDetails: {
|
||||
run: {
|
||||
id: string;
|
||||
eventType: string;
|
||||
status: string;
|
||||
owner: string;
|
||||
repo: string;
|
||||
prNumber?: number;
|
||||
commitSha?: string;
|
||||
headSha?: string;
|
||||
baseSha?: string;
|
||||
createdAt: string;
|
||||
updatedAt: string;
|
||||
};
|
||||
findings: Array<{
|
||||
id: string;
|
||||
title: string;
|
||||
detail: string;
|
||||
evidence: string;
|
||||
suggestion: string;
|
||||
severity: 'high' | 'medium' | 'low';
|
||||
category: string;
|
||||
path: string;
|
||||
line: number;
|
||||
confidence: number;
|
||||
published: boolean;
|
||||
fingerprint: string;
|
||||
}>;
|
||||
comments: Array<{
|
||||
id: string;
|
||||
status: string;
|
||||
body: string;
|
||||
path?: string;
|
||||
line?: number;
|
||||
createdAt: string;
|
||||
}>;
|
||||
} | null;
|
||||
}
|
||||
|
||||
export interface ReviewSessionListResponse {
|
||||
data: ReviewSessionSummaryRecordDto[];
|
||||
}
|
||||
|
||||
export const fetchReviewSessions = async (): Promise<ReviewSessionSummaryRecordDto[]> => {
|
||||
const response = await api.get<ReviewSessionListResponse>('/review/sessions');
|
||||
return response.data.data;
|
||||
};
|
||||
|
||||
export const fetchReviewSessionDetail = async (
|
||||
sessionId: string
|
||||
): Promise<ReviewSessionDetailDto> => {
|
||||
const response = await api.get<ReviewSessionDetailDto>(`/review/sessions/${sessionId}`);
|
||||
return response.data;
|
||||
};
|
||||
@@ -107,9 +107,9 @@ const configResponse = {
|
||||
label: '审查引擎',
|
||||
description: '当前使用的审查引擎',
|
||||
type: 'enum',
|
||||
enumValues: ['agent', 'codex'],
|
||||
enumValues: ['kernel', 'codex'],
|
||||
sensitive: false,
|
||||
value: 'agent',
|
||||
value: 'kernel',
|
||||
hasValue: true,
|
||||
source: 'db',
|
||||
},
|
||||
|
||||
@@ -52,6 +52,7 @@
|
||||
"start:prod": "bun run dist/index.js",
|
||||
"lint": "biome check src/",
|
||||
"test": "bun test",
|
||||
"test:e2e": "E2E_MOCK_LLM=1 bun test ./e2e/__tests__/e2e-review.test.ts",
|
||||
"prepare": "command -v husky >/dev/null 2>&1 && husky || true"
|
||||
},
|
||||
"keywords": [
|
||||
|
||||
134
src/agent-kernel/__tests__/agent-kernel-runner.test.ts
Normal file
134
src/agent-kernel/__tests__/agent-kernel-runner.test.ts
Normal file
@@ -0,0 +1,134 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test';
|
||||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import path from 'node:path';
|
||||
import { closeDatabase, initDatabase } from '../../db/database';
|
||||
import { KernelAgentInvoker } from '../agents/kernel-agent-invoker';
|
||||
import { KernelAgentRegistry } from '../agents/kernel-agent-registry';
|
||||
import { KernelTaskRegistry } from '../registry/kernel-task-registry';
|
||||
import { AgentKernelRunner } from '../runtime/agent-kernel-runner';
|
||||
import { kernelSessionRepository } from '../session/session-repository';
|
||||
|
||||
interface DummyState {
|
||||
counter: number;
|
||||
}
|
||||
|
||||
describe('AgentKernelRunner', () => {
|
||||
let tempDir: string;
|
||||
let savedDbPath: string | undefined;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(path.join(tmpdir(), 'kernel-runner-db-'));
|
||||
savedDbPath = process.env.DATABASE_PATH;
|
||||
process.env.DATABASE_PATH = path.join(tempDir, 'assistant.db');
|
||||
initDatabase();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
closeDatabase();
|
||||
if (savedDbPath === undefined) {
|
||||
Reflect.deleteProperty(process.env, 'DATABASE_PATH');
|
||||
} else {
|
||||
process.env.DATABASE_PATH = savedDbPath;
|
||||
}
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test('runs queued skills and subagents and persists checkpoint', async () => {
|
||||
const session = kernelSessionRepository.ensureSession({
|
||||
scopeType: 'pull_request',
|
||||
scopeKey: 'acme/repo#7',
|
||||
metadata: { owner: 'acme', repo: 'repo', prNumber: 7 },
|
||||
runId: 'run-7',
|
||||
});
|
||||
|
||||
const skillRegistry = new KernelTaskRegistry<DummyState>();
|
||||
const subagentRegistry = new KernelAgentRegistry<DummyState>();
|
||||
|
||||
skillRegistry.register({
|
||||
kind: 'skill',
|
||||
name: 'step_one',
|
||||
description: 'Initial skill for runner test',
|
||||
execute: async () => ({
|
||||
state: { counter: 1 },
|
||||
enqueue: [{ kind: 'subagent', name: 'step_two' }],
|
||||
}),
|
||||
});
|
||||
|
||||
subagentRegistry.register({
|
||||
kind: 'subagent',
|
||||
name: 'step_two',
|
||||
source: 'built-in',
|
||||
whenToUse: 'Increment the test counter',
|
||||
description: 'Test subagent used by runner tests',
|
||||
execute: async (_task, context) => ({
|
||||
state: { counter: context.state.counter + 1 },
|
||||
}),
|
||||
});
|
||||
|
||||
const runner = new AgentKernelRunner(skillRegistry, new KernelAgentInvoker(subagentRegistry), {
|
||||
plan: () => [],
|
||||
});
|
||||
const checkpoint = await runner.run({
|
||||
sessionId: session.id,
|
||||
runId: 'run-7',
|
||||
initialState: { counter: 0 },
|
||||
initialTasks: [{ kind: 'skill', name: 'step_one' }],
|
||||
});
|
||||
|
||||
const events = kernelSessionRepository.listEvents(session.id);
|
||||
|
||||
expect(checkpoint.state.counter).toBe(2);
|
||||
expect(checkpoint.pendingTasks).toHaveLength(0);
|
||||
expect(checkpoint.stopReason).toBe('completed');
|
||||
expect(events.map((event) => event.eventType).sort()).toEqual([
|
||||
'task_completed',
|
||||
'task_completed',
|
||||
'task_started',
|
||||
'task_started',
|
||||
]);
|
||||
});
|
||||
|
||||
test('continueExisting ignores persisted stop reason and resumes planned work', async () => {
|
||||
const session = kernelSessionRepository.ensureSession({
|
||||
scopeType: 'pull_request',
|
||||
scopeKey: 'acme/repo#8',
|
||||
metadata: { owner: 'acme', repo: 'repo', prNumber: 8 },
|
||||
runId: 'run-8',
|
||||
});
|
||||
|
||||
kernelSessionRepository.saveCheckpoint(session.id, {
|
||||
state: { counter: 1 },
|
||||
pendingTasks: [],
|
||||
stopReason: 'awaiting_human_feedback',
|
||||
});
|
||||
|
||||
const skillRegistry = new KernelTaskRegistry<DummyState>();
|
||||
const subagentRegistry = new KernelAgentRegistry<DummyState>();
|
||||
|
||||
skillRegistry.register({
|
||||
kind: 'skill',
|
||||
name: 'resume_step',
|
||||
description: 'Resume skill for runner test',
|
||||
execute: async (_task, context) => ({
|
||||
state: { counter: context.state.counter + 1 },
|
||||
}),
|
||||
});
|
||||
|
||||
const runner = new AgentKernelRunner(skillRegistry, new KernelAgentInvoker(subagentRegistry), {
|
||||
plan: (context) =>
|
||||
context.state.counter < 2 ? [{ kind: 'skill', name: 'resume_step' }] : [],
|
||||
});
|
||||
|
||||
const checkpoint = await runner.run({
|
||||
sessionId: session.id,
|
||||
runId: 'run-8',
|
||||
initialState: { counter: 0 },
|
||||
initialTasks: [],
|
||||
continueExisting: true,
|
||||
});
|
||||
|
||||
expect(checkpoint.state.counter).toBe(2);
|
||||
expect(checkpoint.stopReason).toBe('completed');
|
||||
});
|
||||
});
|
||||
81
src/agent-kernel/__tests__/kernel-agent-invoker.test.ts
Normal file
81
src/agent-kernel/__tests__/kernel-agent-invoker.test.ts
Normal file
@@ -0,0 +1,81 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test';
|
||||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import path from 'node:path';
|
||||
import { closeDatabase, initDatabase } from '../../db/database';
|
||||
import { getKernelAgentContext } from '../agents/kernel-agent-context';
|
||||
import { KernelAgentInvoker } from '../agents/kernel-agent-invoker';
|
||||
import { KernelAgentRegistry } from '../agents/kernel-agent-registry';
|
||||
import { kernelSessionRepository } from '../session/session-repository';
|
||||
|
||||
interface DummyState {
|
||||
value: number;
|
||||
}
|
||||
|
||||
describe('KernelAgentInvoker', () => {
|
||||
let tempDir: string;
|
||||
let savedDbPath: string | undefined;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(path.join(tmpdir(), 'kernel-agent-invoker-db-'));
|
||||
savedDbPath = process.env.DATABASE_PATH;
|
||||
process.env.DATABASE_PATH = path.join(tempDir, 'assistant.db');
|
||||
initDatabase();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
closeDatabase();
|
||||
if (savedDbPath === undefined) {
|
||||
Reflect.deleteProperty(process.env, 'DATABASE_PATH');
|
||||
} else {
|
||||
process.env.DATABASE_PATH = savedDbPath;
|
||||
}
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test('invokes subagent with isolated agent context and structured result', async () => {
|
||||
const session = kernelSessionRepository.ensureSession({
|
||||
scopeType: 'pull_request',
|
||||
scopeKey: 'acme/repo#88',
|
||||
metadata: { owner: 'acme', repo: 'repo', prNumber: 88 },
|
||||
runId: 'run-88',
|
||||
});
|
||||
|
||||
const registry = new KernelAgentRegistry<DummyState>();
|
||||
registry.register({
|
||||
kind: 'subagent',
|
||||
name: 'test:subagent',
|
||||
source: 'built-in',
|
||||
description: 'Test subagent',
|
||||
whenToUse: 'Used by invoker test',
|
||||
tags: ['test'],
|
||||
execute: async (_task, context) => {
|
||||
const agentContext = getKernelAgentContext();
|
||||
expect(agentContext?.agentType).toBe('subagent');
|
||||
expect(agentContext?.subagentName).toBe('test:subagent');
|
||||
expect(context.delegation.parentSessionId).toBe(session.id);
|
||||
|
||||
return {
|
||||
state: { value: context.state.value + 1 },
|
||||
summary: 'subagent completed',
|
||||
artifacts: { nextValue: context.state.value + 1 },
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const invoker = new KernelAgentInvoker(registry);
|
||||
const output = await invoker.invoke(
|
||||
{ kind: 'subagent', name: 'test:subagent', input: { focus: 'test' } },
|
||||
{
|
||||
session,
|
||||
runId: 'run-88',
|
||||
state: { value: 1 },
|
||||
}
|
||||
);
|
||||
|
||||
expect(output.result?.state).toEqual({ value: 2 });
|
||||
expect(output.invocation.status).toBe('completed');
|
||||
expect(output.invocation.result?.summary).toBe('subagent completed');
|
||||
expect(output.invocation.result?.artifacts).toEqual({ nextValue: 2 });
|
||||
});
|
||||
});
|
||||
101
src/agent-kernel/__tests__/session-repository.test.ts
Normal file
101
src/agent-kernel/__tests__/session-repository.test.ts
Normal file
@@ -0,0 +1,101 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test';
|
||||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import path from 'node:path';
|
||||
import { closeDatabase, initDatabase } from '../../db/database';
|
||||
import { kernelSessionRepository } from '../session/session-repository';
|
||||
|
||||
describe('KernelSessionRepository', () => {
|
||||
let tempDir: string;
|
||||
let savedDbPath: string | undefined;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(path.join(tmpdir(), 'kernel-session-db-'));
|
||||
savedDbPath = process.env.DATABASE_PATH;
|
||||
process.env.DATABASE_PATH = path.join(tempDir, 'assistant.db');
|
||||
initDatabase();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
closeDatabase();
|
||||
if (savedDbPath === undefined) {
|
||||
Reflect.deleteProperty(process.env, 'DATABASE_PATH');
|
||||
} else {
|
||||
process.env.DATABASE_PATH = savedDbPath;
|
||||
}
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test('ensureSession reuses the same scope key and updates metadata', () => {
|
||||
const first = kernelSessionRepository.ensureSession({
|
||||
scopeType: 'pull_request',
|
||||
scopeKey: 'acme/repo#42',
|
||||
metadata: { owner: 'acme', repo: 'repo', prNumber: 42 },
|
||||
runId: 'run-1',
|
||||
});
|
||||
|
||||
const second = kernelSessionRepository.ensureSession({
|
||||
scopeType: 'pull_request',
|
||||
scopeKey: 'acme/repo#42',
|
||||
metadata: { owner: 'acme', repo: 'repo', prNumber: 42, updated: true },
|
||||
runId: 'run-2',
|
||||
});
|
||||
|
||||
expect(second.id).toBe(first.id);
|
||||
expect(second.lastRunId).toBe('run-2');
|
||||
expect(second.metadata).toEqual({ owner: 'acme', repo: 'repo', prNumber: 42, updated: true });
|
||||
});
|
||||
|
||||
test('appendEvent and saveCheckpoint persist session runtime state', () => {
|
||||
const session = kernelSessionRepository.ensureSession({
|
||||
scopeType: 'pull_request',
|
||||
scopeKey: 'acme/repo#99',
|
||||
metadata: { owner: 'acme', repo: 'repo', prNumber: 99 },
|
||||
runId: 'run-99',
|
||||
});
|
||||
|
||||
kernelSessionRepository.appendEvent(session.id, 'review_enqueued', { runId: 'run-99' });
|
||||
kernelSessionRepository.appendEvent(session.id, 'task_started', { name: 'prepare_workspace' });
|
||||
kernelSessionRepository.saveCheckpoint(session.id, {
|
||||
state: { prepared: true, findings: 3 },
|
||||
pendingTasks: [{ kind: 'skill', name: 'publish_review' }],
|
||||
stopReason: 'waiting',
|
||||
});
|
||||
|
||||
const events = kernelSessionRepository.listEvents(session.id);
|
||||
const checkpoint = kernelSessionRepository.loadCheckpoint<{
|
||||
prepared: boolean;
|
||||
findings: number;
|
||||
}>(session.id);
|
||||
|
||||
expect(events).toHaveLength(2);
|
||||
expect(events.map((event) => event.eventType).sort()).toEqual([
|
||||
'review_enqueued',
|
||||
'task_started',
|
||||
]);
|
||||
expect(checkpoint).not.toBeNull();
|
||||
expect(checkpoint?.state).toEqual({ prepared: true, findings: 3 });
|
||||
expect(checkpoint?.pendingTasks).toEqual([{ kind: 'skill', name: 'publish_review' }]);
|
||||
expect(checkpoint?.stopReason).toBe('waiting');
|
||||
});
|
||||
|
||||
test('can query sessions by scope key and list sessions', () => {
|
||||
const first = kernelSessionRepository.ensureSession({
|
||||
scopeType: 'pull_request',
|
||||
scopeKey: 'acme/repo#1',
|
||||
metadata: { owner: 'acme', repo: 'repo', prNumber: 1 },
|
||||
runId: 'run-1',
|
||||
});
|
||||
const second = kernelSessionRepository.ensureSession({
|
||||
scopeType: 'pull_request',
|
||||
scopeKey: 'acme/repo#2',
|
||||
metadata: { owner: 'acme', repo: 'repo', prNumber: 2 },
|
||||
runId: 'run-2',
|
||||
});
|
||||
|
||||
expect(kernelSessionRepository.getSessionByScopeKey('acme/repo#1')?.id).toBe(first.id);
|
||||
expect(kernelSessionRepository.listSessions(10).map((session) => session.id)).toEqual(
|
||||
expect.arrayContaining([first.id, second.id])
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,68 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test';
|
||||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import path from 'node:path';
|
||||
import { closeDatabase, initDatabase } from '../../db/database';
|
||||
import { kernelSessionRepository } from '../session/session-repository';
|
||||
|
||||
describe('KernelSessionRepository subagent invocations', () => {
|
||||
let tempDir: string;
|
||||
let savedDbPath: string | undefined;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(path.join(tmpdir(), 'kernel-subagent-db-'));
|
||||
savedDbPath = process.env.DATABASE_PATH;
|
||||
process.env.DATABASE_PATH = path.join(tempDir, 'assistant.db');
|
||||
initDatabase();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
closeDatabase();
|
||||
if (savedDbPath === undefined) {
|
||||
Reflect.deleteProperty(process.env, 'DATABASE_PATH');
|
||||
} else {
|
||||
process.env.DATABASE_PATH = savedDbPath;
|
||||
}
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test('persists and lists subagent invocations', () => {
|
||||
const session = kernelSessionRepository.ensureSession({
|
||||
scopeType: 'pull_request',
|
||||
scopeKey: 'acme/repo#101',
|
||||
metadata: { owner: 'acme', repo: 'repo', prNumber: 101 },
|
||||
runId: 'run-101',
|
||||
});
|
||||
|
||||
const invocation = kernelSessionRepository.createSubagentInvocation({
|
||||
parentSessionId: session.id,
|
||||
parentRunId: 'run-101',
|
||||
parentTaskName: 'custom:security-audit',
|
||||
subagentName: 'custom:security-audit',
|
||||
agentId: 'agent-123',
|
||||
packet: {
|
||||
goal: 'Review security issues',
|
||||
parentTaskName: 'custom:security-audit',
|
||||
input: { domain: 'security' },
|
||||
parentSessionId: session.id,
|
||||
parentRunId: 'run-101',
|
||||
contextSummary: 'summary',
|
||||
},
|
||||
});
|
||||
|
||||
kernelSessionRepository.completeSubagentInvocation(invocation.id, 'completed', {
|
||||
agentId: 'agent-123',
|
||||
agentType: 'custom:security-audit',
|
||||
summary: 'security review done',
|
||||
totalDurationMs: 10,
|
||||
totalToolUseCount: 0,
|
||||
totalTokens: 0,
|
||||
artifacts: { findings: 2 },
|
||||
});
|
||||
|
||||
const invocations = kernelSessionRepository.listSubagentInvocations(session.id);
|
||||
expect(invocations).toHaveLength(1);
|
||||
expect(invocations[0]?.subagentName).toBe('custom:security-audit');
|
||||
expect(invocations[0]?.result?.summary).toBe('security review done');
|
||||
});
|
||||
});
|
||||
15
src/agent-kernel/agents/kernel-agent-context.ts
Normal file
15
src/agent-kernel/agents/kernel-agent-context.ts
Normal file
@@ -0,0 +1,15 @@
|
||||
import { AsyncLocalStorage } from 'node:async_hooks';
|
||||
import type { KernelSubagentContextRecord } from '../types';
|
||||
|
||||
const kernelAgentContextStorage = new AsyncLocalStorage<KernelSubagentContextRecord>();
|
||||
|
||||
export function getKernelAgentContext(): KernelSubagentContextRecord | undefined {
|
||||
return kernelAgentContextStorage.getStore();
|
||||
}
|
||||
|
||||
export function runWithKernelAgentContext<T>(
|
||||
context: KernelSubagentContextRecord,
|
||||
fn: () => Promise<T>
|
||||
): Promise<T> {
|
||||
return kernelAgentContextStorage.run(context, fn);
|
||||
}
|
||||
140
src/agent-kernel/agents/kernel-agent-invoker.ts
Normal file
140
src/agent-kernel/agents/kernel-agent-invoker.ts
Normal file
@@ -0,0 +1,140 @@
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import type { KernelHookRegistry } from '../hooks/kernel-hook-registry';
|
||||
import { runKernelHooks } from '../hooks/kernel-hook-runner';
|
||||
import { kernelSessionRepository } from '../session/session-repository';
|
||||
import type {
|
||||
KernelAgentExecutionContext,
|
||||
KernelDelegationPacket,
|
||||
KernelExecutionContext,
|
||||
KernelHandlerResult,
|
||||
KernelSubagentDefinition,
|
||||
KernelTask,
|
||||
} from '../types';
|
||||
import { runWithKernelAgentContext } from './kernel-agent-context';
|
||||
import { KernelAgentRegistry } from './kernel-agent-registry';
|
||||
import { finalizeKernelSubagentResult } from './kernel-subagent-result';
|
||||
|
||||
export interface KernelSubagentInvocationOutput<TState> {
|
||||
result?: KernelHandlerResult<TState>;
|
||||
invocation: ReturnType<typeof kernelSessionRepository.listSubagentInvocations>[number];
|
||||
}
|
||||
|
||||
export class KernelAgentInvoker<TState> {
|
||||
constructor(
|
||||
private readonly registry: KernelAgentRegistry<TState>,
|
||||
private readonly hookRegistry?: KernelHookRegistry
|
||||
) {}
|
||||
|
||||
get(name: string): KernelSubagentDefinition<TState> | undefined {
|
||||
return this.registry.get(name);
|
||||
}
|
||||
|
||||
getAll(): KernelSubagentDefinition<TState>[] {
|
||||
return this.registry.getAll();
|
||||
}
|
||||
|
||||
filterByTag(tag: string): KernelSubagentDefinition<TState>[] {
|
||||
return this.registry.filterByTag(tag);
|
||||
}
|
||||
|
||||
async invoke(
|
||||
task: KernelTask,
|
||||
context: KernelExecutionContext<TState>
|
||||
): Promise<KernelSubagentInvocationOutput<TState>> {
|
||||
const agent = this.registry.get(task.name);
|
||||
if (!agent) {
|
||||
throw new Error(`Kernel subagent definition not found: ${task.name}`);
|
||||
}
|
||||
|
||||
const agentId = randomUUID();
|
||||
const delegation: KernelDelegationPacket = {
|
||||
goal: agent.whenToUse,
|
||||
parentTaskName: task.name,
|
||||
input: task.input ?? {},
|
||||
parentSessionId: context.session.id,
|
||||
parentRunId: context.runId,
|
||||
contextSummary:
|
||||
typeof (context.state as { compressedContext?: { summary?: string } }).compressedContext
|
||||
?.summary === 'string'
|
||||
? (context.state as { compressedContext?: { summary?: string } }).compressedContext
|
||||
?.summary
|
||||
: undefined,
|
||||
};
|
||||
|
||||
const invocation = kernelSessionRepository.createSubagentInvocation({
|
||||
parentSessionId: context.session.id,
|
||||
parentRunId: context.runId,
|
||||
parentTaskName: task.name,
|
||||
subagentName: agent.name,
|
||||
agentId,
|
||||
packet: delegation,
|
||||
});
|
||||
|
||||
const agentContext: KernelAgentExecutionContext<TState> = {
|
||||
...context,
|
||||
agent,
|
||||
delegation,
|
||||
};
|
||||
|
||||
if (this.hookRegistry) {
|
||||
const hookResult = await runKernelHooks({
|
||||
registry: this.hookRegistry,
|
||||
input: {
|
||||
event: 'SubagentStart',
|
||||
sessionId: context.session.id,
|
||||
runId: context.runId,
|
||||
subagentName: agent.name,
|
||||
agentId,
|
||||
packet: delegation,
|
||||
},
|
||||
});
|
||||
if (hookResult.blockingReason) {
|
||||
throw new Error(hookResult.blockingReason);
|
||||
}
|
||||
}
|
||||
|
||||
const startTime = Date.now();
|
||||
try {
|
||||
const result = await runWithKernelAgentContext(
|
||||
{
|
||||
agentId,
|
||||
parentSessionId: context.session.id,
|
||||
agentType: 'subagent',
|
||||
subagentName: agent.name,
|
||||
source: agent.source,
|
||||
invocationKind: 'spawn',
|
||||
},
|
||||
() => agent.execute(task, agentContext)
|
||||
);
|
||||
|
||||
const finalized = finalizeKernelSubagentResult({
|
||||
agentId,
|
||||
agentType: agent.name,
|
||||
startTime,
|
||||
result,
|
||||
});
|
||||
|
||||
return {
|
||||
result,
|
||||
invocation: kernelSessionRepository.completeSubagentInvocation(
|
||||
invocation.id,
|
||||
'completed',
|
||||
finalized
|
||||
),
|
||||
};
|
||||
} catch (error) {
|
||||
const finalized = finalizeKernelSubagentResult({
|
||||
agentId,
|
||||
agentType: agent.name,
|
||||
startTime,
|
||||
result: {
|
||||
summary: error instanceof Error ? error.message : String(error),
|
||||
artifacts: { error: error instanceof Error ? error.message : String(error) },
|
||||
},
|
||||
});
|
||||
|
||||
kernelSessionRepository.completeSubagentInvocation(invocation.id, 'failed', finalized);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
21
src/agent-kernel/agents/kernel-agent-registry.ts
Normal file
21
src/agent-kernel/agents/kernel-agent-registry.ts
Normal file
@@ -0,0 +1,21 @@
|
||||
import type { KernelSubagentDefinition } from '../types';
|
||||
|
||||
export class KernelAgentRegistry<TState> {
|
||||
private readonly agents = new Map<string, KernelSubagentDefinition<TState>>();
|
||||
|
||||
register(agent: KernelSubagentDefinition<TState>): void {
|
||||
this.agents.set(agent.name, agent);
|
||||
}
|
||||
|
||||
get(agentType: string): KernelSubagentDefinition<TState> | undefined {
|
||||
return this.agents.get(agentType);
|
||||
}
|
||||
|
||||
getAll(): KernelSubagentDefinition<TState>[] {
|
||||
return [...this.agents.values()];
|
||||
}
|
||||
|
||||
filterByTag(tag: string): KernelSubagentDefinition<TState>[] {
|
||||
return this.getAll().filter((agent) => agent.tags?.includes(tag));
|
||||
}
|
||||
}
|
||||
21
src/agent-kernel/agents/kernel-subagent-result.ts
Normal file
21
src/agent-kernel/agents/kernel-subagent-result.ts
Normal file
@@ -0,0 +1,21 @@
|
||||
import type { KernelHandlerResult, KernelSubagentInvocationResult } from '../types';
|
||||
|
||||
export function finalizeKernelSubagentResult<TState>(params: {
|
||||
agentId: string;
|
||||
agentType: string;
|
||||
startTime: number;
|
||||
result?: KernelHandlerResult<TState>;
|
||||
}): KernelSubagentInvocationResult {
|
||||
const { agentId, agentType, startTime, result } = params;
|
||||
const totalDurationMs = Date.now() - startTime;
|
||||
|
||||
return {
|
||||
agentId,
|
||||
agentType,
|
||||
summary: result?.summary ?? `${agentType} completed`,
|
||||
totalDurationMs,
|
||||
totalToolUseCount: 0,
|
||||
totalTokens: 0,
|
||||
artifacts: result?.artifacts,
|
||||
};
|
||||
}
|
||||
219
src/agent-kernel/hooks/__tests__/kernel-hook-runner.test.ts
Normal file
219
src/agent-kernel/hooks/__tests__/kernel-hook-runner.test.ts
Normal file
@@ -0,0 +1,219 @@
|
||||
import { describe, expect, test } from 'bun:test';
|
||||
import { KernelHookRegistry } from '../kernel-hook-registry';
|
||||
import { runKernelHooks } from '../kernel-hook-runner';
|
||||
import type { KernelHookDefinition, KernelHookInput } from '../kernel-hook-types';
|
||||
|
||||
const baseContext = {
|
||||
workspacePath: '/tmp/workspace',
|
||||
mirrorPath: '/tmp/mirror',
|
||||
runId: 'run-1',
|
||||
};
|
||||
|
||||
function makeRegistry(hooks: KernelHookDefinition[]): KernelHookRegistry {
|
||||
const registry = new KernelHookRegistry();
|
||||
for (const hook of hooks) {
|
||||
registry.register(hook);
|
||||
}
|
||||
return registry;
|
||||
}
|
||||
|
||||
function makeHook(
|
||||
name: string,
|
||||
event: KernelHookInput['event'],
|
||||
execute: KernelHookDefinition['execute']
|
||||
): KernelHookDefinition {
|
||||
return {
|
||||
name,
|
||||
event,
|
||||
description: `Test hook ${name}`,
|
||||
execute,
|
||||
};
|
||||
}
|
||||
|
||||
describe('runKernelHooks', () => {
|
||||
test.each([
|
||||
[
|
||||
'SessionStart',
|
||||
{
|
||||
event: 'SessionStart',
|
||||
sessionId: 'session-1',
|
||||
runId: 'run-1',
|
||||
scopeKey: 'repo#1',
|
||||
},
|
||||
],
|
||||
[
|
||||
'SubagentStart',
|
||||
{
|
||||
event: 'SubagentStart',
|
||||
sessionId: 'session-1',
|
||||
runId: 'run-1',
|
||||
subagentName: 'test:subagent',
|
||||
agentId: 'agent-1',
|
||||
packet: {
|
||||
input: { focus: 'test' },
|
||||
goal: 'test goal',
|
||||
parentTaskName: 'test:task',
|
||||
parentSessionId: 'session-1',
|
||||
parentRunId: 'run-1',
|
||||
},
|
||||
},
|
||||
],
|
||||
[
|
||||
'PermissionRequest',
|
||||
{
|
||||
event: 'PermissionRequest',
|
||||
toolName: 'write_file',
|
||||
toolCallId: 'call-1',
|
||||
input: { value: 'raw' },
|
||||
context: baseContext,
|
||||
suggestedBehavior: 'ask',
|
||||
reason: 'needs approval',
|
||||
},
|
||||
],
|
||||
[
|
||||
'PreToolUse',
|
||||
{
|
||||
event: 'PreToolUse',
|
||||
toolName: 'write_file',
|
||||
toolCallId: 'call-1',
|
||||
input: { value: 'raw' },
|
||||
context: baseContext,
|
||||
},
|
||||
],
|
||||
[
|
||||
'PostToolUse',
|
||||
{
|
||||
event: 'PostToolUse',
|
||||
toolName: 'write_file',
|
||||
toolCallId: 'call-1',
|
||||
input: { value: 'raw' },
|
||||
output: { ok: true },
|
||||
context: baseContext,
|
||||
},
|
||||
],
|
||||
[
|
||||
'PostToolUseFailure',
|
||||
{
|
||||
event: 'PostToolUseFailure',
|
||||
toolName: 'write_file',
|
||||
toolCallId: 'call-1',
|
||||
input: { value: 'raw' },
|
||||
error: 'boom',
|
||||
context: baseContext,
|
||||
},
|
||||
],
|
||||
] as const)('dispatches %s to matching hooks', async (_label, input) => {
|
||||
const executed: string[] = [];
|
||||
const registry = makeRegistry([
|
||||
makeHook('first', input.event, async () => {
|
||||
executed.push('first');
|
||||
return { additionalContext: 'ctx-1' };
|
||||
}),
|
||||
]);
|
||||
|
||||
const result = await runKernelHooks({ registry, input });
|
||||
|
||||
expect(executed).toEqual(['first']);
|
||||
expect(result.results).toHaveLength(1);
|
||||
expect(result.additionalContexts).toEqual(['ctx-1']);
|
||||
});
|
||||
|
||||
test('aggregates additionalContext values and lets later updatedInput override earlier values', async () => {
|
||||
const registry = makeRegistry([
|
||||
makeHook('first', 'PreToolUse', async () => ({
|
||||
additionalContext: 'ctx-1',
|
||||
updatedInput: { value: 'first' },
|
||||
})),
|
||||
makeHook('second', 'PreToolUse', async () => ({
|
||||
additionalContext: 'ctx-2',
|
||||
updatedInput: { value: 'second' },
|
||||
})),
|
||||
]);
|
||||
|
||||
const result = await runKernelHooks({
|
||||
registry,
|
||||
input: {
|
||||
event: 'PreToolUse',
|
||||
toolName: 'write_file',
|
||||
toolCallId: 'call-1',
|
||||
input: { value: 'raw' },
|
||||
context: baseContext,
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.additionalContexts).toEqual(['ctx-1', 'ctx-2']);
|
||||
expect(result.updatedInput).toEqual({ value: 'second' });
|
||||
expect(result.results).toHaveLength(2);
|
||||
});
|
||||
|
||||
test('propagates blockingReason when a hook returns decision block', async () => {
|
||||
const registry = makeRegistry([
|
||||
makeHook('before', 'PermissionRequest', async () => ({
|
||||
additionalContext: 'ctx-before',
|
||||
updatedInput: { value: 'before' },
|
||||
})),
|
||||
makeHook('blocker', 'PermissionRequest', async () => ({
|
||||
decision: 'block',
|
||||
reason: 'blocked by policy',
|
||||
additionalContext: 'ctx-blocker',
|
||||
updatedInput: { value: 'blocked' },
|
||||
})),
|
||||
makeHook('after', 'PermissionRequest', async () => ({
|
||||
additionalContext: 'ctx-after',
|
||||
updatedInput: { value: 'after' },
|
||||
})),
|
||||
]);
|
||||
|
||||
const result = await runKernelHooks({
|
||||
registry,
|
||||
input: {
|
||||
event: 'PermissionRequest',
|
||||
toolName: 'write_file',
|
||||
toolCallId: 'call-1',
|
||||
input: { value: 'raw' },
|
||||
context: baseContext,
|
||||
suggestedBehavior: 'ask',
|
||||
reason: 'needs approval',
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.additionalContexts).toEqual(['ctx-before', 'ctx-blocker']);
|
||||
expect(result.updatedInput).toEqual({ value: 'blocked' });
|
||||
expect(result.blockingReason).toBe('blocked by policy');
|
||||
expect(result.results).toHaveLength(2);
|
||||
});
|
||||
|
||||
test('preserves approve decisions for PermissionRequest without introducing a blocking reason', async () => {
|
||||
const registry = makeRegistry([
|
||||
makeHook('approver', 'PermissionRequest', async () => ({
|
||||
decision: 'approve',
|
||||
reason: 'approved by reviewer',
|
||||
additionalContext: 'ctx-approve',
|
||||
updatedInput: { value: 'approved' },
|
||||
})),
|
||||
]);
|
||||
|
||||
const result = await runKernelHooks({
|
||||
registry,
|
||||
input: {
|
||||
event: 'PermissionRequest',
|
||||
toolName: 'write_file',
|
||||
toolCallId: 'call-1',
|
||||
input: { value: 'raw' },
|
||||
context: baseContext,
|
||||
suggestedBehavior: 'ask',
|
||||
reason: 'needs approval',
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.additionalContexts).toEqual(['ctx-approve']);
|
||||
expect(result.updatedInput).toEqual({ value: 'approved' });
|
||||
expect(result.blockingReason).toBeUndefined();
|
||||
expect(result.results).toEqual([
|
||||
expect.objectContaining({
|
||||
decision: 'approve',
|
||||
reason: 'approved by reviewer',
|
||||
}),
|
||||
]);
|
||||
});
|
||||
});
|
||||
19
src/agent-kernel/hooks/kernel-hook-registry.ts
Normal file
19
src/agent-kernel/hooks/kernel-hook-registry.ts
Normal file
@@ -0,0 +1,19 @@
|
||||
import type { KernelHookDefinition, KernelHookEventName } from './kernel-hook-types';
|
||||
|
||||
export class KernelHookRegistry {
|
||||
private readonly hooks = new Map<KernelHookEventName, KernelHookDefinition[]>();
|
||||
|
||||
register(hook: KernelHookDefinition): void {
|
||||
const existing = this.hooks.get(hook.event) ?? [];
|
||||
existing.push(hook);
|
||||
this.hooks.set(hook.event, existing);
|
||||
}
|
||||
|
||||
get(event: KernelHookEventName): KernelHookDefinition[] {
|
||||
return this.hooks.get(event) ?? [];
|
||||
}
|
||||
|
||||
getAll(): KernelHookDefinition[] {
|
||||
return [...this.hooks.values()].flat();
|
||||
}
|
||||
}
|
||||
47
src/agent-kernel/hooks/kernel-hook-runner.ts
Normal file
47
src/agent-kernel/hooks/kernel-hook-runner.ts
Normal file
@@ -0,0 +1,47 @@
|
||||
import { logger } from '../../utils/logger';
|
||||
import { KernelHookRegistry } from './kernel-hook-registry';
|
||||
import type { KernelHookInput, KernelLifecycleResult } from './kernel-hook-types';
|
||||
|
||||
export async function runKernelHooks(params: {
|
||||
registry: KernelHookRegistry;
|
||||
input: KernelHookInput;
|
||||
}): Promise<KernelLifecycleResult> {
|
||||
const hooks = params.registry.get(params.input.event);
|
||||
const results = [] as KernelLifecycleResult['results'];
|
||||
const additionalContexts: string[] = [];
|
||||
let updatedInput: Record<string, unknown> | undefined;
|
||||
let blockingReason: string | undefined;
|
||||
|
||||
for (const hook of hooks) {
|
||||
try {
|
||||
const result = await hook.execute(params.input);
|
||||
if (!result) {
|
||||
continue;
|
||||
}
|
||||
results.push(result);
|
||||
if (result.additionalContext) {
|
||||
additionalContexts.push(result.additionalContext);
|
||||
}
|
||||
if (result.updatedInput) {
|
||||
updatedInput = result.updatedInput;
|
||||
}
|
||||
if (result.continue === false || result.decision === 'block') {
|
||||
blockingReason = result.reason ?? `Execution blocked by hook ${hook.name}`;
|
||||
break;
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Kernel hook 执行失败', {
|
||||
hookName: hook.name,
|
||||
event: params.input.event,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
results,
|
||||
additionalContexts,
|
||||
updatedInput,
|
||||
blockingReason,
|
||||
};
|
||||
}
|
||||
99
src/agent-kernel/hooks/kernel-hook-types.ts
Normal file
99
src/agent-kernel/hooks/kernel-hook-types.ts
Normal file
@@ -0,0 +1,99 @@
|
||||
import type { ToolExecutionContext } from '../../review/tools/types';
|
||||
import type { KernelDelegationPacket, KernelSubagentInvocationResult } from '../types';
|
||||
|
||||
export type KernelHookEventName =
|
||||
| 'SessionStart'
|
||||
| 'SubagentStart'
|
||||
| 'PermissionRequest'
|
||||
| 'PreToolUse'
|
||||
| 'PostToolUse'
|
||||
| 'PostToolUseFailure';
|
||||
|
||||
export interface SessionStartHookInput {
|
||||
event: 'SessionStart';
|
||||
sessionId: string;
|
||||
runId: string;
|
||||
scopeKey: string;
|
||||
}
|
||||
|
||||
export interface SubagentStartHookInput {
|
||||
event: 'SubagentStart';
|
||||
sessionId: string;
|
||||
runId: string;
|
||||
subagentName: string;
|
||||
agentId: string;
|
||||
packet: KernelDelegationPacket;
|
||||
}
|
||||
|
||||
export interface PreToolUseHookInput {
|
||||
event: 'PreToolUse';
|
||||
toolName: string;
|
||||
toolCallId: string;
|
||||
input: Record<string, unknown>;
|
||||
context: ToolExecutionContext;
|
||||
}
|
||||
|
||||
export interface PermissionRequestHookInput {
|
||||
event: 'PermissionRequest';
|
||||
toolName: string;
|
||||
toolCallId: string;
|
||||
input: Record<string, unknown>;
|
||||
context: ToolExecutionContext;
|
||||
suggestedBehavior: 'ask' | 'deny';
|
||||
reason: string;
|
||||
}
|
||||
|
||||
export interface PostToolUseHookInput {
|
||||
event: 'PostToolUse';
|
||||
toolName: string;
|
||||
toolCallId: string;
|
||||
input: Record<string, unknown>;
|
||||
output: unknown;
|
||||
context: ToolExecutionContext;
|
||||
}
|
||||
|
||||
export interface PostToolUseFailureHookInput {
|
||||
event: 'PostToolUseFailure';
|
||||
toolName: string;
|
||||
toolCallId: string;
|
||||
input: Record<string, unknown>;
|
||||
error: string;
|
||||
context: ToolExecutionContext;
|
||||
}
|
||||
|
||||
export type KernelHookInput =
|
||||
| SessionStartHookInput
|
||||
| SubagentStartHookInput
|
||||
| PermissionRequestHookInput
|
||||
| PreToolUseHookInput
|
||||
| PostToolUseHookInput
|
||||
| PostToolUseFailureHookInput;
|
||||
|
||||
export interface KernelHookResult {
|
||||
continue?: boolean;
|
||||
additionalContext?: string;
|
||||
updatedInput?: Record<string, unknown>;
|
||||
decision?: 'approve' | 'block';
|
||||
reason?: string;
|
||||
metadata?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface KernelHookDefinition {
|
||||
name: string;
|
||||
event: KernelHookEventName;
|
||||
description: string;
|
||||
execute(input: KernelHookInput): Promise<KernelHookResult | undefined>;
|
||||
}
|
||||
|
||||
export interface KernelLifecycleResult {
|
||||
results: KernelHookResult[];
|
||||
additionalContexts: string[];
|
||||
updatedInput?: Record<string, unknown>;
|
||||
blockingReason?: string;
|
||||
}
|
||||
|
||||
export interface KernelSubagentCompletionEnvelope {
|
||||
invocationId: string;
|
||||
subagentName: string;
|
||||
result: KernelSubagentInvocationResult;
|
||||
}
|
||||
17
src/agent-kernel/registry/kernel-task-registry.ts
Normal file
17
src/agent-kernel/registry/kernel-task-registry.ts
Normal file
@@ -0,0 +1,17 @@
|
||||
import type { KernelTaskHandler } from '../types';
|
||||
|
||||
export class KernelTaskRegistry<TState> {
|
||||
private readonly handlers = new Map<string, KernelTaskHandler<TState>>();
|
||||
|
||||
register(handler: KernelTaskHandler<TState>): void {
|
||||
this.handlers.set(handler.name, handler);
|
||||
}
|
||||
|
||||
get(name: string): KernelTaskHandler<TState> | undefined {
|
||||
return this.handlers.get(name);
|
||||
}
|
||||
|
||||
getAll(): KernelTaskHandler<TState>[] {
|
||||
return [...this.handlers.values()];
|
||||
}
|
||||
}
|
||||
138
src/agent-kernel/runtime/agent-kernel-runner.ts
Normal file
138
src/agent-kernel/runtime/agent-kernel-runner.ts
Normal file
@@ -0,0 +1,138 @@
|
||||
import { KernelAgentInvoker } from '../agents/kernel-agent-invoker';
|
||||
import { KernelTaskRegistry } from '../registry/kernel-task-registry';
|
||||
import { kernelSessionRepository } from '../session/session-repository';
|
||||
import type {
|
||||
KernelCheckpoint,
|
||||
KernelExecutionContext,
|
||||
KernelTask,
|
||||
KernelTurnPlanner,
|
||||
} from '../types';
|
||||
|
||||
export class AgentKernelRunner<TState> {
|
||||
constructor(
|
||||
private readonly skillRegistry: KernelTaskRegistry<TState>,
|
||||
private readonly subagentInvoker: KernelAgentInvoker<TState>,
|
||||
private readonly planner: KernelTurnPlanner<TState>
|
||||
) {}
|
||||
|
||||
async run(params: {
|
||||
sessionId: string;
|
||||
runId: string;
|
||||
initialState: TState;
|
||||
initialTasks: KernelTask[];
|
||||
continueExisting?: boolean;
|
||||
}): Promise<KernelCheckpoint<TState>> {
|
||||
const session = kernelSessionRepository.getSessionById(params.sessionId);
|
||||
if (!session) {
|
||||
throw new Error(`Kernel session not found: ${params.sessionId}`);
|
||||
}
|
||||
|
||||
const persisted = kernelSessionRepository.loadCheckpoint<TState>(params.sessionId);
|
||||
let state = persisted?.state ?? params.initialState;
|
||||
const pendingTasks = [...(persisted?.pendingTasks ?? params.initialTasks)];
|
||||
let stopReason: string | undefined;
|
||||
|
||||
while (!stopReason) {
|
||||
if (pendingTasks.length === 0) {
|
||||
const plannedTasks = this.planner.plan({
|
||||
session,
|
||||
runId: params.runId,
|
||||
state,
|
||||
pendingTasks: [...pendingTasks],
|
||||
});
|
||||
|
||||
if (plannedTasks.length === 0) {
|
||||
stopReason = 'completed';
|
||||
break;
|
||||
}
|
||||
|
||||
pendingTasks.push(...plannedTasks);
|
||||
}
|
||||
|
||||
const task = pendingTasks.shift() as KernelTask;
|
||||
if (task.kind === 'subagent' && !this.subagentInvoker.get(task.name)) {
|
||||
throw new Error(`Kernel subagent handler not found: ${task.name}`);
|
||||
}
|
||||
if (task.kind === 'skill' && !this.skillRegistry.get(task.name)) {
|
||||
throw new Error(`Kernel skill handler not found: ${task.name}`);
|
||||
}
|
||||
|
||||
kernelSessionRepository.appendEvent(params.sessionId, 'task_started', {
|
||||
kind: task.kind,
|
||||
name: task.name,
|
||||
input: task.input ?? {},
|
||||
runId: params.runId,
|
||||
});
|
||||
|
||||
const context: KernelExecutionContext<TState> = {
|
||||
session,
|
||||
runId: params.runId,
|
||||
state,
|
||||
};
|
||||
let result;
|
||||
let invocation;
|
||||
try {
|
||||
if (task.kind === 'skill') {
|
||||
result = await this.skillRegistry.get(task.name)?.execute(task, context);
|
||||
} else {
|
||||
const invocationOutput = await this.subagentInvoker.invoke(task, context);
|
||||
result = invocationOutput.result;
|
||||
invocation = invocationOutput.invocation;
|
||||
}
|
||||
} catch (error) {
|
||||
kernelSessionRepository.appendEvent(params.sessionId, 'task_failed', {
|
||||
kind: task.kind,
|
||||
name: task.name,
|
||||
runId: params.runId,
|
||||
invocationId: invocation?.id,
|
||||
agentId: invocation?.agentId,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
kernelSessionRepository.saveCheckpoint(params.sessionId, {
|
||||
state,
|
||||
pendingTasks: [task, ...pendingTasks],
|
||||
stopReason: 'failed',
|
||||
});
|
||||
throw error;
|
||||
}
|
||||
|
||||
if (result?.state !== undefined) {
|
||||
state = result.state;
|
||||
}
|
||||
if (result?.prepend?.length) {
|
||||
pendingTasks.unshift(...result.prepend);
|
||||
}
|
||||
if (result?.enqueue?.length) {
|
||||
pendingTasks.push(...result.enqueue);
|
||||
}
|
||||
if (result?.stopReason) {
|
||||
stopReason = result.stopReason;
|
||||
}
|
||||
|
||||
kernelSessionRepository.appendEvent(params.sessionId, 'task_completed', {
|
||||
kind: task.kind,
|
||||
name: task.name,
|
||||
runId: params.runId,
|
||||
invocationId: invocation?.id,
|
||||
agentId: invocation?.agentId,
|
||||
summary: invocation?.result?.summary ?? result?.summary,
|
||||
artifacts: invocation?.result?.artifacts ?? result?.artifacts,
|
||||
stopReason: result?.stopReason,
|
||||
});
|
||||
|
||||
kernelSessionRepository.saveCheckpoint(params.sessionId, {
|
||||
state,
|
||||
pendingTasks,
|
||||
stopReason,
|
||||
});
|
||||
}
|
||||
|
||||
const checkpoint = {
|
||||
state,
|
||||
pendingTasks,
|
||||
stopReason: stopReason ?? 'completed',
|
||||
};
|
||||
kernelSessionRepository.saveCheckpoint(params.sessionId, checkpoint);
|
||||
return checkpoint;
|
||||
}
|
||||
}
|
||||
335
src/agent-kernel/session/session-repository.ts
Normal file
335
src/agent-kernel/session/session-repository.ts
Normal file
@@ -0,0 +1,335 @@
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import { getDatabase } from '../../db/database';
|
||||
import type {
|
||||
KernelCheckpoint,
|
||||
KernelDelegationPacket,
|
||||
KernelSessionEventRecord,
|
||||
KernelSessionRecord,
|
||||
KernelSubagentInvocationRecord,
|
||||
KernelSubagentInvocationResult,
|
||||
} from '../types';
|
||||
|
||||
interface SessionRow {
|
||||
id: string;
|
||||
scope_type: 'pull_request' | 'commit';
|
||||
scope_key: string;
|
||||
metadata_json: string;
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
last_run_id?: string;
|
||||
}
|
||||
|
||||
interface EventRow {
|
||||
id: string;
|
||||
session_id: string;
|
||||
event_type: string;
|
||||
payload_json: string;
|
||||
created_at: string;
|
||||
}
|
||||
|
||||
interface CheckpointRow {
|
||||
session_id: string;
|
||||
state_json: string;
|
||||
pending_tasks_json: string;
|
||||
stop_reason?: string;
|
||||
updated_at: string;
|
||||
state_version: number;
|
||||
}
|
||||
|
||||
interface SubagentInvocationRow {
|
||||
id: string;
|
||||
parent_session_id: string;
|
||||
parent_run_id: string;
|
||||
parent_task_name: string;
|
||||
subagent_name: string;
|
||||
agent_id: string;
|
||||
status: 'running' | 'completed' | 'failed';
|
||||
input_json: string;
|
||||
result_json?: string;
|
||||
started_at: string;
|
||||
finished_at?: string;
|
||||
}
|
||||
|
||||
function toSessionRecord(row: SessionRow): KernelSessionRecord {
|
||||
return {
|
||||
id: row.id,
|
||||
scopeType: row.scope_type,
|
||||
scopeKey: row.scope_key,
|
||||
metadata: JSON.parse(row.metadata_json) as Record<string, unknown>,
|
||||
createdAt: row.created_at,
|
||||
updatedAt: row.updated_at,
|
||||
lastRunId: row.last_run_id,
|
||||
};
|
||||
}
|
||||
|
||||
export class KernelSessionRepository {
|
||||
ensureSession(input: {
|
||||
scopeType: 'pull_request' | 'commit';
|
||||
scopeKey: string;
|
||||
metadata: Record<string, unknown>;
|
||||
runId?: string;
|
||||
}): KernelSessionRecord {
|
||||
const db = getDatabase();
|
||||
const existing = db
|
||||
.query(
|
||||
`SELECT id, scope_type, scope_key, metadata_json, created_at, updated_at, last_run_id
|
||||
FROM agent_kernel_sessions
|
||||
WHERE scope_key = ?`
|
||||
)
|
||||
.get(input.scopeKey) as SessionRow | null;
|
||||
|
||||
if (existing) {
|
||||
db.query(
|
||||
`UPDATE agent_kernel_sessions
|
||||
SET metadata_json = ?, updated_at = datetime('now'), last_run_id = ?
|
||||
WHERE id = ?`
|
||||
).run(
|
||||
JSON.stringify(input.metadata),
|
||||
input.runId ?? existing.last_run_id ?? null,
|
||||
existing.id
|
||||
);
|
||||
|
||||
return this.getSessionById(existing.id) as KernelSessionRecord;
|
||||
}
|
||||
|
||||
const id = randomUUID();
|
||||
db.query(
|
||||
`INSERT INTO agent_kernel_sessions (
|
||||
id, scope_type, scope_key, metadata_json, last_run_id
|
||||
) VALUES (?, ?, ?, ?, ?)`
|
||||
).run(id, input.scopeType, input.scopeKey, JSON.stringify(input.metadata), input.runId ?? null);
|
||||
|
||||
return this.getSessionById(id) as KernelSessionRecord;
|
||||
}
|
||||
|
||||
getSessionById(sessionId: string): KernelSessionRecord | null {
|
||||
const db = getDatabase();
|
||||
const row = db
|
||||
.query(
|
||||
`SELECT id, scope_type, scope_key, metadata_json, created_at, updated_at, last_run_id
|
||||
FROM agent_kernel_sessions
|
||||
WHERE id = ?`
|
||||
)
|
||||
.get(sessionId) as SessionRow | null;
|
||||
|
||||
return row ? toSessionRecord(row) : null;
|
||||
}
|
||||
|
||||
getSessionByScopeKey(scopeKey: string): KernelSessionRecord | null {
|
||||
const db = getDatabase();
|
||||
const row = db
|
||||
.query(
|
||||
`SELECT id, scope_type, scope_key, metadata_json, created_at, updated_at, last_run_id
|
||||
FROM agent_kernel_sessions
|
||||
WHERE scope_key = ?`
|
||||
)
|
||||
.get(scopeKey) as SessionRow | null;
|
||||
|
||||
return row ? toSessionRecord(row) : null;
|
||||
}
|
||||
|
||||
listSessions(limit = 50): KernelSessionRecord[] {
|
||||
const db = getDatabase();
|
||||
const rows = db
|
||||
.query(
|
||||
`SELECT id, scope_type, scope_key, metadata_json, created_at, updated_at, last_run_id
|
||||
FROM agent_kernel_sessions
|
||||
ORDER BY updated_at DESC, created_at DESC
|
||||
LIMIT ?`
|
||||
)
|
||||
.all(limit) as SessionRow[];
|
||||
|
||||
return rows.map(toSessionRecord);
|
||||
}
|
||||
|
||||
appendEvent(
|
||||
sessionId: string,
|
||||
eventType: string,
|
||||
payload: Record<string, unknown>
|
||||
): KernelSessionEventRecord {
|
||||
const db = getDatabase();
|
||||
const id = randomUUID();
|
||||
db.query(
|
||||
`INSERT INTO agent_kernel_session_events (id, session_id, event_type, payload_json)
|
||||
VALUES (?, ?, ?, ?)`
|
||||
).run(id, sessionId, eventType, JSON.stringify(payload));
|
||||
|
||||
const row = db
|
||||
.query(
|
||||
`SELECT id, session_id, event_type, payload_json, created_at
|
||||
FROM agent_kernel_session_events
|
||||
WHERE id = ?`
|
||||
)
|
||||
.get(id) as EventRow;
|
||||
|
||||
return {
|
||||
id: row.id,
|
||||
sessionId: row.session_id,
|
||||
eventType: row.event_type,
|
||||
payload: JSON.parse(row.payload_json) as Record<string, unknown>,
|
||||
createdAt: row.created_at,
|
||||
};
|
||||
}
|
||||
|
||||
listEvents(sessionId: string): KernelSessionEventRecord[] {
|
||||
const db = getDatabase();
|
||||
const rows = db
|
||||
.query(
|
||||
`SELECT id, session_id, event_type, payload_json, created_at
|
||||
FROM agent_kernel_session_events
|
||||
WHERE session_id = ?
|
||||
ORDER BY created_at ASC, id ASC`
|
||||
)
|
||||
.all(sessionId) as EventRow[];
|
||||
|
||||
return rows.map((row) => ({
|
||||
id: row.id,
|
||||
sessionId: row.session_id,
|
||||
eventType: row.event_type,
|
||||
payload: JSON.parse(row.payload_json) as Record<string, unknown>,
|
||||
createdAt: row.created_at,
|
||||
}));
|
||||
}
|
||||
|
||||
saveCheckpoint<TState>(
|
||||
sessionId: string,
|
||||
checkpoint: KernelCheckpoint<TState>,
|
||||
stateVersion = 1
|
||||
): void {
|
||||
const db = getDatabase();
|
||||
db.query(
|
||||
`INSERT INTO agent_kernel_session_checkpoints (
|
||||
session_id, state_json, pending_tasks_json, stop_reason, state_version, updated_at
|
||||
) VALUES (?, ?, ?, ?, ?, datetime('now'))
|
||||
ON CONFLICT(session_id) DO UPDATE SET
|
||||
state_json = excluded.state_json,
|
||||
pending_tasks_json = excluded.pending_tasks_json,
|
||||
stop_reason = excluded.stop_reason,
|
||||
state_version = excluded.state_version,
|
||||
updated_at = datetime('now')`
|
||||
).run(
|
||||
sessionId,
|
||||
JSON.stringify(checkpoint.state),
|
||||
JSON.stringify(checkpoint.pendingTasks),
|
||||
checkpoint.stopReason ?? null,
|
||||
stateVersion
|
||||
);
|
||||
}
|
||||
|
||||
loadCheckpoint<TState>(sessionId: string): KernelCheckpoint<TState> | null {
|
||||
const db = getDatabase();
|
||||
const row = db
|
||||
.query(
|
||||
`SELECT session_id, state_json, pending_tasks_json, stop_reason, updated_at, state_version
|
||||
FROM agent_kernel_session_checkpoints
|
||||
WHERE session_id = ?`
|
||||
)
|
||||
.get(sessionId) as CheckpointRow | null;
|
||||
|
||||
if (!row) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
state: JSON.parse(row.state_json) as TState,
|
||||
pendingTasks: JSON.parse(row.pending_tasks_json) as KernelCheckpoint<TState>['pendingTasks'],
|
||||
stopReason: row.stop_reason,
|
||||
};
|
||||
}
|
||||
|
||||
deleteCheckpoint(sessionId: string): void {
|
||||
const db = getDatabase();
|
||||
db.query('DELETE FROM agent_kernel_session_checkpoints WHERE session_id = ?').run(sessionId);
|
||||
}
|
||||
|
||||
createSubagentInvocation(input: {
|
||||
parentSessionId: string;
|
||||
parentRunId: string;
|
||||
parentTaskName: string;
|
||||
subagentName: string;
|
||||
agentId: string;
|
||||
packet: KernelDelegationPacket;
|
||||
}): KernelSubagentInvocationRecord {
|
||||
const db = getDatabase();
|
||||
const id = randomUUID();
|
||||
db.query(
|
||||
`INSERT INTO agent_kernel_subagent_invocations (
|
||||
id, parent_session_id, parent_run_id, parent_task_name, subagent_name, agent_id, status, input_json
|
||||
) VALUES (?, ?, ?, ?, ?, ?, 'running', ?)`
|
||||
).run(
|
||||
id,
|
||||
input.parentSessionId,
|
||||
input.parentRunId,
|
||||
input.parentTaskName,
|
||||
input.subagentName,
|
||||
input.agentId,
|
||||
JSON.stringify(input.packet)
|
||||
);
|
||||
|
||||
return this.getSubagentInvocationById(id) as KernelSubagentInvocationRecord;
|
||||
}
|
||||
|
||||
completeSubagentInvocation(
|
||||
invocationId: string,
|
||||
status: 'completed' | 'failed',
|
||||
result: KernelSubagentInvocationResult
|
||||
): KernelSubagentInvocationRecord {
|
||||
const db = getDatabase();
|
||||
db.query(
|
||||
`UPDATE agent_kernel_subagent_invocations
|
||||
SET status = ?, result_json = ?, finished_at = datetime('now')
|
||||
WHERE id = ?`
|
||||
).run(status, JSON.stringify(result), invocationId);
|
||||
|
||||
return this.getSubagentInvocationById(invocationId) as KernelSubagentInvocationRecord;
|
||||
}
|
||||
|
||||
listSubagentInvocations(parentSessionId: string): KernelSubagentInvocationRecord[] {
|
||||
const db = getDatabase();
|
||||
const rows = db
|
||||
.query(
|
||||
`SELECT id, parent_session_id, parent_run_id, parent_task_name, subagent_name, agent_id,
|
||||
status, input_json, result_json, started_at, finished_at
|
||||
FROM agent_kernel_subagent_invocations
|
||||
WHERE parent_session_id = ?
|
||||
ORDER BY started_at ASC, id ASC`
|
||||
)
|
||||
.all(parentSessionId) as SubagentInvocationRow[];
|
||||
|
||||
return rows.map((row) => this.toSubagentInvocationRecord(row));
|
||||
}
|
||||
|
||||
private getSubagentInvocationById(invocationId: string): KernelSubagentInvocationRecord | null {
|
||||
const db = getDatabase();
|
||||
const row = db
|
||||
.query(
|
||||
`SELECT id, parent_session_id, parent_run_id, parent_task_name, subagent_name, agent_id,
|
||||
status, input_json, result_json, started_at, finished_at
|
||||
FROM agent_kernel_subagent_invocations
|
||||
WHERE id = ?`
|
||||
)
|
||||
.get(invocationId) as SubagentInvocationRow | null;
|
||||
|
||||
return row ? this.toSubagentInvocationRecord(row) : null;
|
||||
}
|
||||
|
||||
private toSubagentInvocationRecord(row: SubagentInvocationRow): KernelSubagentInvocationRecord {
|
||||
return {
|
||||
id: row.id,
|
||||
parentSessionId: row.parent_session_id,
|
||||
parentRunId: row.parent_run_id,
|
||||
parentTaskName: row.parent_task_name,
|
||||
subagentName: row.subagent_name,
|
||||
agentId: row.agent_id,
|
||||
status: row.status,
|
||||
input: JSON.parse(row.input_json) as KernelDelegationPacket,
|
||||
result: row.result_json
|
||||
? (JSON.parse(row.result_json) as KernelSubagentInvocationResult)
|
||||
: undefined,
|
||||
startedAt: row.started_at,
|
||||
finishedAt: row.finished_at,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export const kernelSessionRepository = new KernelSessionRepository();
|
||||
132
src/agent-kernel/types.ts
Normal file
132
src/agent-kernel/types.ts
Normal file
@@ -0,0 +1,132 @@
|
||||
export type KernelTaskKind = 'skill' | 'subagent';
|
||||
|
||||
export interface KernelTask {
|
||||
kind: KernelTaskKind;
|
||||
name: string;
|
||||
input?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface KernelDelegationPacket {
|
||||
goal: string;
|
||||
parentTaskName: string;
|
||||
input: Record<string, unknown>;
|
||||
parentSessionId: string;
|
||||
parentRunId: string;
|
||||
contextSummary?: string;
|
||||
}
|
||||
|
||||
export interface KernelTaskDefinition {
|
||||
kind: KernelTaskKind;
|
||||
name: string;
|
||||
description: string;
|
||||
resumable?: boolean;
|
||||
}
|
||||
|
||||
export type KernelAgentSource = 'built-in' | 'custom' | 'plugin';
|
||||
|
||||
export interface KernelSubagentDefinition<TState> extends KernelTaskDefinition {
|
||||
kind: 'subagent';
|
||||
name: string;
|
||||
source: KernelAgentSource;
|
||||
whenToUse: string;
|
||||
tags?: string[];
|
||||
modelRole?: string;
|
||||
maxTurns?: number;
|
||||
background?: boolean;
|
||||
execute(
|
||||
task: KernelTask,
|
||||
context: KernelAgentExecutionContext<TState>
|
||||
): Promise<KernelHandlerResult<TState> | undefined>;
|
||||
}
|
||||
|
||||
export interface KernelCheckpoint<TState> {
|
||||
state: TState;
|
||||
pendingTasks: KernelTask[];
|
||||
stopReason?: string;
|
||||
}
|
||||
|
||||
export interface KernelSessionRecord {
|
||||
id: string;
|
||||
scopeType: 'pull_request' | 'commit';
|
||||
scopeKey: string;
|
||||
metadata: Record<string, unknown>;
|
||||
createdAt: string;
|
||||
updatedAt: string;
|
||||
lastRunId?: string;
|
||||
}
|
||||
|
||||
export interface KernelSessionEventRecord {
|
||||
id: string;
|
||||
sessionId: string;
|
||||
eventType: string;
|
||||
payload: Record<string, unknown>;
|
||||
createdAt: string;
|
||||
}
|
||||
|
||||
export interface KernelSubagentContextRecord {
|
||||
agentId: string;
|
||||
parentSessionId: string;
|
||||
agentType: 'subagent';
|
||||
subagentName: string;
|
||||
source: KernelAgentSource;
|
||||
invocationKind: 'spawn' | 'resume';
|
||||
}
|
||||
|
||||
export interface KernelSubagentInvocationRecord {
|
||||
id: string;
|
||||
parentSessionId: string;
|
||||
parentRunId: string;
|
||||
parentTaskName: string;
|
||||
subagentName: string;
|
||||
agentId: string;
|
||||
status: 'running' | 'completed' | 'failed';
|
||||
input: KernelDelegationPacket;
|
||||
result?: KernelSubagentInvocationResult;
|
||||
startedAt: string;
|
||||
finishedAt?: string;
|
||||
}
|
||||
|
||||
export interface KernelSubagentInvocationResult {
|
||||
agentId: string;
|
||||
agentType: string;
|
||||
summary: string;
|
||||
totalDurationMs: number;
|
||||
totalToolUseCount: number;
|
||||
totalTokens: number;
|
||||
artifacts?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface KernelExecutionContext<TState> {
|
||||
session: KernelSessionRecord;
|
||||
runId: string;
|
||||
state: TState;
|
||||
}
|
||||
|
||||
export interface KernelAgentExecutionContext<TState> extends KernelExecutionContext<TState> {
|
||||
agent: KernelSubagentDefinition<TState>;
|
||||
delegation: KernelDelegationPacket;
|
||||
}
|
||||
|
||||
export interface KernelPlanningContext<TState> extends KernelExecutionContext<TState> {
|
||||
pendingTasks: KernelTask[];
|
||||
}
|
||||
|
||||
export interface KernelHandlerResult<TState> {
|
||||
state?: TState;
|
||||
enqueue?: KernelTask[];
|
||||
prepend?: KernelTask[];
|
||||
stopReason?: string;
|
||||
summary?: string;
|
||||
artifacts?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export interface KernelTaskHandler<TState> extends KernelTaskDefinition {
|
||||
execute(
|
||||
task: KernelTask,
|
||||
context: KernelExecutionContext<TState>
|
||||
): Promise<KernelHandlerResult<TState> | undefined>;
|
||||
}
|
||||
|
||||
export interface KernelTurnPlanner<TState> {
|
||||
plan(context: KernelPlanningContext<TState>): KernelTask[];
|
||||
}
|
||||
@@ -66,7 +66,7 @@ describe('ConfigManager (DB backend)', () => {
|
||||
|
||||
describe('getCurrent() defaults', () => {
|
||||
test('returns default engine when DB is empty', () => {
|
||||
expect(configManager.getCurrent().review.engine).toBe('agent');
|
||||
expect(configManager.getCurrent().review.engine).toBe('kernel');
|
||||
});
|
||||
|
||||
test('reads port from process.env.PORT, defaults to 5174', () => {
|
||||
@@ -105,18 +105,18 @@ describe('ConfigManager (DB backend)', () => {
|
||||
|
||||
describe('setOverrides() and getSource()', () => {
|
||||
test('setOverrides writes to DB, getCurrent reflects the change', async () => {
|
||||
await configManager.setOverrides({ REVIEW_ENGINE: 'agent' });
|
||||
expect(configManager.getCurrent().review.engine).toBe('agent');
|
||||
await configManager.setOverrides({ REVIEW_ENGINE: 'kernel' });
|
||||
expect(configManager.getCurrent().review.engine).toBe('kernel');
|
||||
});
|
||||
|
||||
test('setOverrides with empty string deletes the key (resets to default)', async () => {
|
||||
await configManager.setOverrides({ REVIEW_ENGINE: 'agent' });
|
||||
await configManager.setOverrides({ REVIEW_ENGINE: 'kernel' });
|
||||
await configManager.setOverrides({ REVIEW_ENGINE: '' });
|
||||
expect(configManager.getCurrent().review.engine).toBe('agent');
|
||||
expect(configManager.getCurrent().review.engine).toBe('kernel');
|
||||
});
|
||||
|
||||
test('getSource returns "db" when value is stored', async () => {
|
||||
await configManager.setOverrides({ REVIEW_ENGINE: 'agent' });
|
||||
await configManager.setOverrides({ REVIEW_ENGINE: 'kernel' });
|
||||
expect(configManager.getSource('REVIEW_ENGINE')).toBe('db');
|
||||
});
|
||||
|
||||
@@ -131,7 +131,7 @@ describe('ConfigManager (DB backend)', () => {
|
||||
|
||||
test('unknown keys are silently ignored', async () => {
|
||||
await configManager.setOverrides({ UNKNOWN_KEY_XYZ: 'value' });
|
||||
expect(configManager.getCurrent().review.engine).toBe('agent');
|
||||
expect(configManager.getCurrent().review.engine).toBe('kernel');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -139,15 +139,15 @@ describe('ConfigManager (DB backend)', () => {
|
||||
|
||||
describe('resetKeys()', () => {
|
||||
test('resetKeys deletes key from DB, value reverts to default', async () => {
|
||||
await configManager.setOverrides({ REVIEW_ENGINE: 'agent' });
|
||||
await configManager.setOverrides({ REVIEW_ENGINE: 'kernel' });
|
||||
await configManager.resetKeys(['REVIEW_ENGINE']);
|
||||
expect(configManager.getCurrent().review.engine).toBe('agent');
|
||||
expect(configManager.getCurrent().review.engine).toBe('kernel');
|
||||
expect(configManager.getSource('REVIEW_ENGINE')).toBe('default');
|
||||
});
|
||||
|
||||
test('resetKeys on non-existent key does not throw', async () => {
|
||||
await configManager.resetKeys(['REVIEW_ENGINE']);
|
||||
expect(configManager.getCurrent().review.engine).toBe('agent');
|
||||
expect(configManager.getCurrent().review.engine).toBe('kernel');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -171,9 +171,9 @@ describe('ConfigManager (DB backend)', () => {
|
||||
});
|
||||
|
||||
test('seedDefaults is idempotent — no-op when DB already has entries', async () => {
|
||||
await configManager.setOverrides({ REVIEW_ENGINE: 'agent' });
|
||||
await configManager.setOverrides({ REVIEW_ENGINE: 'kernel' });
|
||||
configManager.seedDefaults();
|
||||
expect(configManager.getCurrent().review.engine).toBe('agent');
|
||||
expect(configManager.getCurrent().review.engine).toBe('kernel');
|
||||
});
|
||||
|
||||
test('ADMIN_PASSWORD defaults to "password"', () => {
|
||||
@@ -196,13 +196,13 @@ describe('ConfigManager (DB backend)', () => {
|
||||
|
||||
describe('type conversions in getCurrent()', () => {
|
||||
test('boolean field "true" → true', async () => {
|
||||
await configManager.setOverrides({ REVIEW_ENABLE_HUMAN_GATE: 'true' });
|
||||
expect(configManager.getCurrent().review.enableHumanGate).toBe(true);
|
||||
await configManager.setOverrides({ ENABLE_TRIAGE: 'true' });
|
||||
expect(configManager.getCurrent().review.enableTriage).toBe(true);
|
||||
});
|
||||
|
||||
test('boolean field "false" → false', async () => {
|
||||
await configManager.setOverrides({ REVIEW_ENABLE_HUMAN_GATE: 'false' });
|
||||
expect(configManager.getCurrent().review.enableHumanGate).toBe(false);
|
||||
await configManager.setOverrides({ ENABLE_TRIAGE: 'false' });
|
||||
expect(configManager.getCurrent().review.enableTriage).toBe(false);
|
||||
});
|
||||
|
||||
test('number field is parsed correctly', async () => {
|
||||
|
||||
@@ -32,14 +32,12 @@ export interface AppConfig {
|
||||
giteaAdminToken: string | undefined;
|
||||
};
|
||||
review: {
|
||||
engine: 'agent' | 'codex';
|
||||
engine: 'codex' | 'kernel';
|
||||
workdir: string;
|
||||
globalPrompt: string | undefined;
|
||||
maxParallelRuns: number;
|
||||
maxFilesPerRun: number;
|
||||
maxFileContentChars: number;
|
||||
autoPublishMinConfidence: number;
|
||||
enableHumanGate: boolean;
|
||||
allowedCommands: string[];
|
||||
commandTimeoutMs: number;
|
||||
llmMaxConcurrentCalls: number;
|
||||
@@ -58,13 +56,6 @@ export interface AppConfig {
|
||||
codexModel: string;
|
||||
codexTimeoutMs: number;
|
||||
codexReviewPrompt: string | undefined;
|
||||
qdrantUrl: string | undefined;
|
||||
enableMemory: boolean;
|
||||
fewShotExamplesCount: number;
|
||||
enableReflection: boolean;
|
||||
maxReflectionRounds: number;
|
||||
enableDebate: boolean;
|
||||
debateThreshold: string;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -139,8 +130,8 @@ class ConfigManager {
|
||||
|
||||
return {
|
||||
gitea: {
|
||||
apiUrl: values.GITEA_API_URL ?? 'http://localhost:5174/api/v1',
|
||||
accessToken: values.GITEA_ACCESS_TOKEN ?? 'test_token',
|
||||
apiUrl: values.GITEA_API_URL ?? '',
|
||||
accessToken: values.GITEA_ACCESS_TOKEN ?? '',
|
||||
},
|
||||
notification: {
|
||||
feishu: {
|
||||
@@ -163,14 +154,12 @@ class ConfigManager {
|
||||
giteaAdminToken: values.GITEA_ADMIN_TOKEN,
|
||||
},
|
||||
review: {
|
||||
engine: values.REVIEW_ENGINE === 'codex' ? 'codex' : 'agent',
|
||||
engine: values.REVIEW_ENGINE === 'codex' ? 'codex' : 'kernel',
|
||||
workdir: values.REVIEW_WORKDIR ?? '/tmp/gitea-assistant',
|
||||
globalPrompt: values.GLOBAL_PROMPT,
|
||||
maxParallelRuns: toNumber('REVIEW_MAX_PARALLEL_RUNS', 2),
|
||||
maxFilesPerRun: toNumber('REVIEW_MAX_FILES_PER_RUN', 200),
|
||||
maxFileContentChars: toNumber('REVIEW_MAX_FILE_CONTENT_CHARS', 40000),
|
||||
autoPublishMinConfidence: toNumber('REVIEW_AUTO_PUBLISH_MIN_CONFIDENCE', 0.8),
|
||||
enableHumanGate: toBoolean('REVIEW_ENABLE_HUMAN_GATE', true),
|
||||
allowedCommands: toStringArray('REVIEW_ALLOWED_COMMANDS', [
|
||||
'git',
|
||||
'rg',
|
||||
@@ -195,13 +184,6 @@ class ConfigManager {
|
||||
codexModel: values.CODEX_MODEL ?? 'o3',
|
||||
codexTimeoutMs: toNumber('CODEX_TIMEOUT_MS', 300000),
|
||||
codexReviewPrompt: values.CODEX_REVIEW_PROMPT,
|
||||
qdrantUrl: values.QDRANT_URL,
|
||||
enableMemory: toBoolean('ENABLE_MEMORY', false),
|
||||
fewShotExamplesCount: toNumber('FEW_SHOT_EXAMPLES_COUNT', 10),
|
||||
enableReflection: toBoolean('ENABLE_REFLECTION', false),
|
||||
maxReflectionRounds: toNumber('MAX_REFLECTION_ROUNDS', 2),
|
||||
enableDebate: toBoolean('ENABLE_DEBATE', false),
|
||||
debateThreshold: values.DEBATE_THRESHOLD ?? 'high',
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export type ConfigGroup = 'gitea' | 'notification' | 'security' | 'review' | 'memory';
|
||||
export type ConfigGroup = 'gitea' | 'notification' | 'security' | 'review';
|
||||
|
||||
export type ConfigFieldType = 'string' | 'number' | 'boolean' | 'url' | 'text' | 'enum';
|
||||
|
||||
@@ -57,15 +57,9 @@ export const CONFIG_GROUPS: ConfigGroupMeta[] = [
|
||||
{
|
||||
key: 'review',
|
||||
label: '审查引擎',
|
||||
description: 'Agent 审查模式、并发与沙箱设置',
|
||||
description: 'Kernel/Codex 审查模式、并发与沙箱设置',
|
||||
icon: 'file-check',
|
||||
},
|
||||
{
|
||||
key: 'memory',
|
||||
label: '记忆与学习',
|
||||
description: '向量记忆、反思与辩论系统',
|
||||
icon: 'brain',
|
||||
},
|
||||
];
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -188,17 +182,17 @@ export const CONFIG_FIELDS: ConfigFieldMeta[] = [
|
||||
envKey: 'REVIEW_ENGINE',
|
||||
group: 'review',
|
||||
label: '审查引擎',
|
||||
description: '代码审查模式:agent(任务化分级编排)或 codex(Codex CLI)',
|
||||
description: '代码审查模式:codex(Codex CLI)或 kernel(session 驱动 agentic loop)',
|
||||
type: 'enum',
|
||||
sensitive: false,
|
||||
enumValues: ['agent', 'codex'],
|
||||
defaultValue: 'agent',
|
||||
enumValues: ['codex', 'kernel'],
|
||||
defaultValue: 'kernel',
|
||||
},
|
||||
{
|
||||
envKey: 'REVIEW_WORKDIR',
|
||||
group: 'review',
|
||||
label: '工作目录',
|
||||
description: 'Agent 模式下本地仓库 mirror/worktree 的工作目录',
|
||||
description: 'Kernel 审查模式下本地仓库 mirror/worktree 的工作目录',
|
||||
type: 'string',
|
||||
sensitive: false,
|
||||
defaultValue: '/tmp/gitea-assistant',
|
||||
@@ -236,26 +230,6 @@ export const CONFIG_FIELDS: ConfigFieldMeta[] = [
|
||||
max: 1000000,
|
||||
defaultValue: 40000,
|
||||
},
|
||||
{
|
||||
envKey: 'REVIEW_AUTO_PUBLISH_MIN_CONFIDENCE',
|
||||
group: 'review',
|
||||
label: '自动发布置信度',
|
||||
description: '自动发布评论所需的最小置信度(0~1)',
|
||||
type: 'number',
|
||||
sensitive: false,
|
||||
min: 0,
|
||||
max: 1,
|
||||
defaultValue: 0.8,
|
||||
},
|
||||
{
|
||||
envKey: 'REVIEW_ENABLE_HUMAN_GATE',
|
||||
group: 'review',
|
||||
label: '人工审批',
|
||||
description: '是否启用人工审批队列(低置信度评论需人工确认后发布)',
|
||||
type: 'boolean',
|
||||
sensitive: false,
|
||||
defaultValue: true,
|
||||
},
|
||||
{
|
||||
envKey: 'REVIEW_ALLOWED_COMMANDS',
|
||||
group: 'review',
|
||||
@@ -442,75 +416,6 @@ export const CONFIG_FIELDS: ConfigFieldMeta[] = [
|
||||
type: 'text',
|
||||
sensitive: false,
|
||||
},
|
||||
|
||||
// ── 记忆与学习 ──────────────────────────────────────────────────────────
|
||||
{
|
||||
envKey: 'QDRANT_URL',
|
||||
group: 'memory',
|
||||
label: 'Qdrant 地址',
|
||||
description: 'Qdrant 向量数据库的连接 URL',
|
||||
type: 'url',
|
||||
sensitive: false,
|
||||
},
|
||||
{
|
||||
envKey: 'ENABLE_MEMORY',
|
||||
group: 'memory',
|
||||
label: '启用记忆',
|
||||
description: '是否启用向量记忆系统(需配置 Qdrant)',
|
||||
type: 'boolean',
|
||||
sensitive: false,
|
||||
defaultValue: false,
|
||||
},
|
||||
{
|
||||
envKey: 'FEW_SHOT_EXAMPLES_COUNT',
|
||||
group: 'memory',
|
||||
label: 'Few-shot 示例数',
|
||||
description: '检索的 few-shot 示例数量',
|
||||
type: 'number',
|
||||
sensitive: false,
|
||||
min: 0,
|
||||
max: 20,
|
||||
defaultValue: 10,
|
||||
},
|
||||
{
|
||||
envKey: 'ENABLE_REFLECTION',
|
||||
group: 'memory',
|
||||
label: '启用反思',
|
||||
description: '是否启用审查结果自我反思机制',
|
||||
type: 'boolean',
|
||||
sensitive: false,
|
||||
defaultValue: false,
|
||||
},
|
||||
{
|
||||
envKey: 'MAX_REFLECTION_ROUNDS',
|
||||
group: 'memory',
|
||||
label: '最大反思轮数',
|
||||
description: '反思迭代的最大轮数',
|
||||
type: 'number',
|
||||
sensitive: false,
|
||||
min: 1,
|
||||
max: 5,
|
||||
defaultValue: 2,
|
||||
},
|
||||
{
|
||||
envKey: 'ENABLE_DEBATE',
|
||||
group: 'memory',
|
||||
label: '启用辩论',
|
||||
description: '是否启用多视角辩论机制',
|
||||
type: 'boolean',
|
||||
sensitive: false,
|
||||
defaultValue: false,
|
||||
},
|
||||
{
|
||||
envKey: 'DEBATE_THRESHOLD',
|
||||
group: 'memory',
|
||||
label: '辩论阈值',
|
||||
description: '触发辩论的严重程度阈值',
|
||||
type: 'enum',
|
||||
sensitive: false,
|
||||
enumValues: ['high', 'medium'],
|
||||
defaultValue: 'high',
|
||||
},
|
||||
];
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
434
src/controllers/__tests__/admin-review-sessions.test.ts
Normal file
434
src/controllers/__tests__/admin-review-sessions.test.ts
Normal file
@@ -0,0 +1,434 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test';
|
||||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import path from 'node:path';
|
||||
import { Hono } from 'hono';
|
||||
import { kernelSessionRepository } from '../../agent-kernel/session/session-repository';
|
||||
import type { KernelSessionRecord } from '../../agent-kernel/types';
|
||||
import { closeDatabase, initDatabase } from '../../db/database';
|
||||
import { kernelReviewEngine } from '../../review/kernel/kernel-review-engine';
|
||||
import {
|
||||
REVIEW_FULL_REVIEW_SUBAGENT,
|
||||
REVIEW_TRIAGE_SUBAGENT,
|
||||
} from '../../review/kernel/review-subagent-ids';
|
||||
import { adminController } from '../admin';
|
||||
|
||||
function createTestApp(): Hono {
|
||||
const app = new Hono();
|
||||
app.route('/admin/api', adminController.protectedRoutes);
|
||||
return app;
|
||||
}
|
||||
|
||||
function createRunDetails(runId: string) {
|
||||
const timestamp = '2026-04-13T10:00:00.000Z';
|
||||
return {
|
||||
run: {
|
||||
id: runId,
|
||||
idempotencyKey: 'pr:acme/repo:42:sha-123',
|
||||
eventType: 'pull_request' as const,
|
||||
status: 'in_progress' as const,
|
||||
owner: 'acme',
|
||||
repo: 'repo',
|
||||
cloneUrl: 'https://example.com/acme/repo.git',
|
||||
prNumber: 42,
|
||||
baseSha: 'base-sha',
|
||||
headSha: 'sha-123',
|
||||
commitSha: 'sha-123',
|
||||
attempts: 0,
|
||||
maxAttempts: 2,
|
||||
createdAt: timestamp,
|
||||
updatedAt: timestamp,
|
||||
startedAt: timestamp,
|
||||
},
|
||||
steps: [
|
||||
{
|
||||
id: 'step-1',
|
||||
runId,
|
||||
stepName: 'prepare_workspace',
|
||||
status: 'succeeded' as const,
|
||||
startedAt: timestamp,
|
||||
finishedAt: '2026-04-13T10:00:10.000Z',
|
||||
latencyMs: 10,
|
||||
},
|
||||
{
|
||||
id: 'step-2',
|
||||
runId,
|
||||
stepName: REVIEW_TRIAGE_SUBAGENT,
|
||||
agentName: REVIEW_TRIAGE_SUBAGENT,
|
||||
status: 'started' as const,
|
||||
startedAt: '2026-04-13T10:00:11.000Z',
|
||||
},
|
||||
],
|
||||
findings: [
|
||||
{
|
||||
id: 'finding-1',
|
||||
runId,
|
||||
fingerprint: 'fp-1',
|
||||
category: 'correctness' as const,
|
||||
severity: 'high' as const,
|
||||
confidence: 0.98,
|
||||
path: 'src/index.ts',
|
||||
line: 12,
|
||||
title: 'Potential null dereference',
|
||||
detail: 'Value can be undefined before access.',
|
||||
evidence: 'line 12 reads target.value without guard',
|
||||
suggestion: 'Guard the value before use.',
|
||||
published: false,
|
||||
},
|
||||
],
|
||||
comments: [
|
||||
{
|
||||
id: 'comment-1',
|
||||
runId,
|
||||
path: 'src/index.ts',
|
||||
line: 12,
|
||||
body: 'Please add a null guard here.',
|
||||
status: 'published' as const,
|
||||
createdAt: timestamp,
|
||||
fingerprint: 'fp-1',
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
function seedReviewSession(): { session: KernelSessionRecord; runId: string } {
|
||||
const runId = 'run-42';
|
||||
const session = kernelSessionRepository.ensureSession({
|
||||
scopeType: 'pull_request',
|
||||
scopeKey: 'acme/repo#42',
|
||||
metadata: {
|
||||
owner: 'acme',
|
||||
repo: 'repo',
|
||||
prNumber: 42,
|
||||
headSha: 'sha-123',
|
||||
},
|
||||
runId,
|
||||
});
|
||||
|
||||
kernelSessionRepository.appendEvent(session.id, 'task_started', {
|
||||
name: 'prepare_workspace',
|
||||
kind: 'skill',
|
||||
agentId: 'agent-prepare-123456',
|
||||
});
|
||||
kernelSessionRepository.appendEvent(session.id, 'task_completed', {
|
||||
name: 'prepare_workspace',
|
||||
kind: 'skill',
|
||||
summary: 'workspace ready',
|
||||
});
|
||||
kernelSessionRepository.appendEvent(session.id, 'task_started', {
|
||||
name: 'build_context',
|
||||
kind: 'skill',
|
||||
});
|
||||
kernelSessionRepository.appendEvent(session.id, 'task_completed', {
|
||||
name: 'build_context',
|
||||
kind: 'skill',
|
||||
summary: 'diff captured',
|
||||
});
|
||||
kernelSessionRepository.appendEvent(session.id, 'task_started', {
|
||||
name: REVIEW_TRIAGE_SUBAGENT,
|
||||
kind: 'subagent',
|
||||
agentId: 'agent-triage-abcdef',
|
||||
});
|
||||
kernelSessionRepository.appendEvent(session.id, 'task_completed', {
|
||||
name: REVIEW_TRIAGE_SUBAGENT,
|
||||
kind: 'subagent',
|
||||
summary: 'review hints created',
|
||||
});
|
||||
|
||||
kernelSessionRepository.appendEvent(session.id, 'task_started', {
|
||||
name: REVIEW_FULL_REVIEW_SUBAGENT,
|
||||
kind: 'subagent',
|
||||
agentId: 'agent-full-abcdef',
|
||||
});
|
||||
kernelSessionRepository.appendEvent(session.id, 'task_completed', {
|
||||
name: REVIEW_FULL_REVIEW_SUBAGENT,
|
||||
kind: 'subagent',
|
||||
summary: 'full review done',
|
||||
});
|
||||
kernelSessionRepository.appendEvent(session.id, 'task_started', {
|
||||
name: 'aggregate_findings',
|
||||
kind: 'skill',
|
||||
});
|
||||
kernelSessionRepository.appendEvent(session.id, 'task_completed', {
|
||||
name: 'aggregate_findings',
|
||||
kind: 'skill',
|
||||
summary: '2 findings triaged',
|
||||
stopReason: 'completed',
|
||||
});
|
||||
kernelSessionRepository.appendEvent(session.id, 'human_feedback_processed', {
|
||||
approved: true,
|
||||
fingerprint: 'fp-1',
|
||||
});
|
||||
|
||||
kernelSessionRepository.saveCheckpoint(session.id, {
|
||||
state: {
|
||||
targetSha: 'sha-123',
|
||||
reviewCompleted: true,
|
||||
findings: [{ fingerprint: 'fp-1' }, { fingerprint: 'fp-2' }],
|
||||
published: false,
|
||||
},
|
||||
pendingTasks: [{ kind: 'skill', name: 'publish_review' }],
|
||||
});
|
||||
|
||||
const invocation = kernelSessionRepository.createSubagentInvocation({
|
||||
parentSessionId: session.id,
|
||||
parentRunId: runId,
|
||||
parentTaskName: REVIEW_FULL_REVIEW_SUBAGENT,
|
||||
subagentName: REVIEW_FULL_REVIEW_SUBAGENT,
|
||||
agentId: 'agent-full-abcdef',
|
||||
packet: {
|
||||
goal: 'Run a full autonomous review for changed files',
|
||||
parentTaskName: REVIEW_FULL_REVIEW_SUBAGENT,
|
||||
input: { mode: 'light', suspectedEntrypoints: ['src/index.ts'] },
|
||||
parentSessionId: session.id,
|
||||
parentRunId: runId,
|
||||
contextSummary: 'Focus on nullable flow and async boundaries.',
|
||||
},
|
||||
});
|
||||
|
||||
kernelSessionRepository.completeSubagentInvocation(invocation.id, 'completed', {
|
||||
agentId: 'agent-full-abcdef',
|
||||
agentType: REVIEW_FULL_REVIEW_SUBAGENT,
|
||||
summary: 'Found 2 review concerns',
|
||||
totalDurationMs: 25,
|
||||
totalToolUseCount: 3,
|
||||
totalTokens: 1200,
|
||||
artifacts: { findings: ['fp-1', 'fp-2'] },
|
||||
});
|
||||
|
||||
return { session, runId };
|
||||
}
|
||||
|
||||
describe('admin review session routes', () => {
|
||||
let tempDir: string;
|
||||
let savedDbPath: string | undefined;
|
||||
const originalGetRunDetails = kernelReviewEngine.getRunDetails;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(path.join(tmpdir(), 'admin-review-sessions-db-'));
|
||||
savedDbPath = process.env.DATABASE_PATH;
|
||||
process.env.DATABASE_PATH = path.join(tempDir, 'assistant.db');
|
||||
initDatabase();
|
||||
kernelReviewEngine.getRunDetails = async (runId: string) => createRunDetails(runId);
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
kernelReviewEngine.getRunDetails = originalGetRunDetails;
|
||||
closeDatabase();
|
||||
if (savedDbPath === undefined) {
|
||||
Reflect.deleteProperty(process.env, 'DATABASE_PATH');
|
||||
} else {
|
||||
process.env.DATABASE_PATH = savedDbPath;
|
||||
}
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test('GET /admin/api/review/sessions returns projected summaries from persisted history', async () => {
|
||||
seedReviewSession();
|
||||
const app = createTestApp();
|
||||
|
||||
const response = await app.request('http://localhost/admin/api/review/sessions?limit=10');
|
||||
const payload = (await response.json()) as {
|
||||
data: Array<{
|
||||
session: KernelSessionRecord;
|
||||
summary: {
|
||||
status: string;
|
||||
currentStep?: string;
|
||||
findingCount: number;
|
||||
pendingTaskCount: number;
|
||||
owner?: string;
|
||||
repo?: string;
|
||||
prNumber?: number;
|
||||
headSha?: string;
|
||||
};
|
||||
}>;
|
||||
};
|
||||
|
||||
expect(response.status).toBe(200);
|
||||
expect(payload.data).toHaveLength(1);
|
||||
expect(payload.data[0]?.session.scopeKey).toBe('acme/repo#42');
|
||||
expect(payload.data[0]?.summary.status).toBe('executing');
|
||||
expect(payload.data[0]?.summary.currentStep).toBe('发布结果');
|
||||
expect(payload.data[0]?.summary.findingCount).toBe(2);
|
||||
expect(payload.data[0]?.summary.pendingTaskCount).toBe(1);
|
||||
expect(payload.data[0]?.summary.owner).toBe('acme');
|
||||
expect(payload.data[0]?.summary.repo).toBe('repo');
|
||||
expect(payload.data[0]?.summary.prNumber).toBe(42);
|
||||
expect(payload.data[0]?.summary.headSha).toBe('sha-123');
|
||||
});
|
||||
|
||||
test('GET /admin/api/review/sessions/:sessionId returns checkpoint plan timeline subagent invocations and run details', async () => {
|
||||
const { session, runId } = seedReviewSession();
|
||||
const app = createTestApp();
|
||||
|
||||
const response = await app.request(`http://localhost/admin/api/review/sessions/${session.id}`);
|
||||
const payload = (await response.json()) as {
|
||||
session: KernelSessionRecord;
|
||||
summary: {
|
||||
status: string;
|
||||
currentStep?: string;
|
||||
findingCount: number;
|
||||
pendingTaskCount: number;
|
||||
};
|
||||
checkpoint: {
|
||||
state: {
|
||||
targetSha: string;
|
||||
reviewCompleted: boolean;
|
||||
findings: Array<{ fingerprint: string }>;
|
||||
published: boolean;
|
||||
};
|
||||
pendingTasks: Array<{ kind: string; name: string }>;
|
||||
};
|
||||
plan: Array<{ key: string; label: string; status: string; progressText?: string }>;
|
||||
timeline: Array<{ title: string; detail: string; tone: string }>;
|
||||
subagentInvocations: Array<{
|
||||
parentRunId: string;
|
||||
subagentName: string;
|
||||
status: string;
|
||||
input: { goal: string; contextSummary?: string; input: Record<string, unknown> };
|
||||
result?: { summary: string; totalDurationMs: number; totalToolUseCount: number };
|
||||
}>;
|
||||
runDetails: ReturnType<typeof createRunDetails>;
|
||||
};
|
||||
|
||||
expect(response.status).toBe(200);
|
||||
expect(payload.session.id).toBe(session.id);
|
||||
expect(payload.summary.status).toBe('executing');
|
||||
expect(payload.summary.currentStep).toBe('发布结果');
|
||||
expect(payload.summary.findingCount).toBe(2);
|
||||
expect(payload.summary.pendingTaskCount).toBe(1);
|
||||
|
||||
expect(payload.checkpoint.state.targetSha).toBe('sha-123');
|
||||
expect(payload.checkpoint.state.reviewCompleted).toBe(true);
|
||||
expect(payload.checkpoint.state.findings.map((finding) => finding.fingerprint)).toEqual([
|
||||
'fp-1',
|
||||
'fp-2',
|
||||
]);
|
||||
expect(payload.checkpoint.state.published).toBe(false);
|
||||
expect(payload.checkpoint.pendingTasks).toEqual([{ kind: 'skill', name: 'publish_review' }]);
|
||||
|
||||
const planByKey = new Map(payload.plan.map((step) => [step.key, step]));
|
||||
expect(planByKey.get('prepare_workspace')).toMatchObject({
|
||||
label: '准备工作区',
|
||||
status: 'completed',
|
||||
});
|
||||
expect(planByKey.get('build_context')).toMatchObject({
|
||||
label: '构建上下文',
|
||||
status: 'completed',
|
||||
});
|
||||
expect(planByKey.get(REVIEW_TRIAGE_SUBAGENT)).toMatchObject({
|
||||
label: '生成审查提示',
|
||||
status: 'completed',
|
||||
});
|
||||
expect(planByKey.get(REVIEW_FULL_REVIEW_SUBAGENT)).toMatchObject({
|
||||
label: '完整自主审查',
|
||||
status: 'completed',
|
||||
progressText: 'full review completed',
|
||||
});
|
||||
expect(planByKey.get('aggregate_findings')).toMatchObject({
|
||||
label: '聚合与筛选',
|
||||
status: 'completed',
|
||||
});
|
||||
expect(planByKey.get('publish_review')).toMatchObject({
|
||||
label: '发布结果',
|
||||
status: 'queued',
|
||||
});
|
||||
|
||||
expect(payload.timeline.length).toBeGreaterThanOrEqual(6);
|
||||
expect(
|
||||
payload.timeline.some(
|
||||
(entry) => entry.title === '开始执行 prepare_workspace' && entry.tone === 'neutral'
|
||||
)
|
||||
).toBe(true);
|
||||
expect(payload.timeline.some((entry) => entry.detail.includes('workspace ready'))).toBe(true);
|
||||
expect(payload.timeline.some((entry) => entry.title === '人工反馈已写回')).toBe(true);
|
||||
expect(payload.timeline.some((entry) => entry.detail.includes('finding 已确认发布'))).toBe(
|
||||
true
|
||||
);
|
||||
|
||||
expect(payload.subagentInvocations).toHaveLength(1);
|
||||
expect(payload.subagentInvocations[0]).toMatchObject({
|
||||
parentRunId: runId,
|
||||
subagentName: REVIEW_FULL_REVIEW_SUBAGENT,
|
||||
status: 'completed',
|
||||
input: {
|
||||
goal: 'Run a full autonomous review for changed files',
|
||||
contextSummary: 'Focus on nullable flow and async boundaries.',
|
||||
},
|
||||
});
|
||||
expect(payload.subagentInvocations[0]?.input.input).toEqual({
|
||||
mode: 'light',
|
||||
suspectedEntrypoints: ['src/index.ts'],
|
||||
});
|
||||
expect(payload.subagentInvocations[0]?.result).toMatchObject({
|
||||
summary: 'Found 2 review concerns',
|
||||
totalDurationMs: 25,
|
||||
totalToolUseCount: 3,
|
||||
});
|
||||
|
||||
expect(payload.runDetails.run).toMatchObject({
|
||||
id: runId,
|
||||
status: 'in_progress',
|
||||
owner: 'acme',
|
||||
repo: 'repo',
|
||||
prNumber: 42,
|
||||
headSha: 'sha-123',
|
||||
});
|
||||
expect(payload.runDetails.steps).toHaveLength(2);
|
||||
expect(payload.runDetails.steps[0]).toMatchObject({
|
||||
stepName: 'prepare_workspace',
|
||||
status: 'succeeded',
|
||||
});
|
||||
expect(payload.runDetails.findings[0]).toMatchObject({
|
||||
fingerprint: 'fp-1',
|
||||
category: 'correctness',
|
||||
severity: 'high',
|
||||
});
|
||||
expect(payload.runDetails.comments[0]).toMatchObject({
|
||||
fingerprint: 'fp-1',
|
||||
status: 'published',
|
||||
});
|
||||
});
|
||||
|
||||
test('GET /admin/api/review/kernel catalog routes return arrays', async () => {
|
||||
const app = createTestApp();
|
||||
|
||||
const tasksResponse = await app.request('http://localhost/admin/api/review/kernel/tasks');
|
||||
const subagentsResponse = await app.request(
|
||||
'http://localhost/admin/api/review/kernel/subagents'
|
||||
);
|
||||
const hooksResponse = await app.request('http://localhost/admin/api/review/kernel/hooks');
|
||||
|
||||
const tasksPayload = (await tasksResponse.json()) as { data: unknown[] };
|
||||
const subagentsPayload = (await subagentsResponse.json()) as { data: unknown[] };
|
||||
const hooksPayload = (await hooksResponse.json()) as { data: unknown[] };
|
||||
|
||||
expect(tasksResponse.status).toBe(200);
|
||||
expect(subagentsResponse.status).toBe(200);
|
||||
expect(hooksResponse.status).toBe(200);
|
||||
expect(Array.isArray(tasksPayload.data)).toBe(true);
|
||||
expect(Array.isArray(subagentsPayload.data)).toBe(true);
|
||||
expect(Array.isArray(hooksPayload.data)).toBe(true);
|
||||
expect(tasksPayload.data.length).toBeGreaterThan(0);
|
||||
expect(subagentsPayload.data.length).toBeGreaterThan(0);
|
||||
expect(hooksPayload.data.length).toBeGreaterThan(0);
|
||||
const subagents = subagentsPayload.data as Array<{ name?: string; tags?: string[] }>;
|
||||
const fullReviewSubagent = subagents.find(
|
||||
(subagent) => subagent.name === REVIEW_FULL_REVIEW_SUBAGENT
|
||||
);
|
||||
|
||||
expect(subagents).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({ name: REVIEW_TRIAGE_SUBAGENT }),
|
||||
expect.objectContaining({ name: REVIEW_FULL_REVIEW_SUBAGENT }),
|
||||
])
|
||||
);
|
||||
expect(fullReviewSubagent?.tags).toEqual(
|
||||
expect.arrayContaining(['full-review', 'autonomous-review'])
|
||||
);
|
||||
expect(subagents.some((subagent) => subagent.name?.startsWith('review:specialist:'))).toBe(
|
||||
false
|
||||
);
|
||||
expect(subagents.some((subagent) => subagent.tags?.includes('domain-review'))).toBe(false);
|
||||
});
|
||||
});
|
||||
366
src/controllers/__tests__/feedback-kernel-session.test.ts
Normal file
366
src/controllers/__tests__/feedback-kernel-session.test.ts
Normal file
@@ -0,0 +1,366 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test';
|
||||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import path from 'node:path';
|
||||
import { Hono } from 'hono';
|
||||
import { kernelSessionRepository } from '../../agent-kernel/session/session-repository';
|
||||
import type { KernelSessionEventRecord, KernelSessionRecord } from '../../agent-kernel/types';
|
||||
import { closeDatabase, initDatabase } from '../../db/database';
|
||||
import { kernelReviewEngine } from '../../review/kernel/kernel-review-engine';
|
||||
import { getReviewSessionScope } from '../../review/kernel/session-scope';
|
||||
import { FileReviewStore } from '../../review/store/file-review-store';
|
||||
import type {
|
||||
Finding,
|
||||
PullRequestReviewPayload,
|
||||
ReviewCommentRecord,
|
||||
ReviewRun,
|
||||
} from '../../review/types';
|
||||
import { giteaService } from '../../services/gitea';
|
||||
import { feedbackRouter, initializeFeedbackSystem } from '../feedback';
|
||||
|
||||
function createTestApp(): Hono {
|
||||
const app = new Hono();
|
||||
app.route('/feedback', feedbackRouter);
|
||||
return app;
|
||||
}
|
||||
|
||||
async function jsonRequest(app: Hono, findingId: string, approved: boolean, reason?: string) {
|
||||
const response = await app.request(`http://localhost/feedback/finding/${findingId}`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({ approved, reason }),
|
||||
});
|
||||
|
||||
return {
|
||||
response,
|
||||
payload: (await response.json()) as Record<string, unknown>,
|
||||
};
|
||||
}
|
||||
|
||||
function createPullRequestPayload(keySuffix: string): PullRequestReviewPayload {
|
||||
return {
|
||||
idempotencyKey: `pr:acme/repo:42:${keySuffix}`,
|
||||
eventType: 'pull_request',
|
||||
owner: 'acme',
|
||||
repo: 'repo',
|
||||
cloneUrl: 'https://example.com/acme/repo.git',
|
||||
prNumber: 42,
|
||||
baseSha: 'base-sha',
|
||||
headSha: `head-${keySuffix}`,
|
||||
maxAttempts: 2,
|
||||
};
|
||||
}
|
||||
|
||||
function createFinding(runId: string, index: number): Finding {
|
||||
return {
|
||||
id: `finding-${index}`,
|
||||
runId,
|
||||
fingerprint: `fp-${index}`,
|
||||
category: 'correctness',
|
||||
severity: index === 1 ? 'high' : 'medium',
|
||||
confidence: index === 1 ? 0.99 : 0.91,
|
||||
path: 'src/index.ts',
|
||||
line: 10 + index,
|
||||
title: `Potential issue ${index}`,
|
||||
detail: `Detail for finding ${index}`,
|
||||
evidence: `Evidence for finding ${index}`,
|
||||
suggestion: `Suggestion for finding ${index}`,
|
||||
published: false,
|
||||
};
|
||||
}
|
||||
|
||||
async function seedRunWithSession(
|
||||
store: FileReviewStore,
|
||||
options: { keySuffix: string; findingCount?: number }
|
||||
): Promise<{
|
||||
run: ReviewRun;
|
||||
findings: Finding[];
|
||||
session: KernelSessionRecord;
|
||||
}> {
|
||||
const payload = createPullRequestPayload(options.keySuffix);
|
||||
const { run } = await store.createOrReuseRun(payload);
|
||||
const findings = Array.from({ length: options.findingCount ?? 1 }, (_, index) =>
|
||||
createFinding(run.id, index + 1)
|
||||
);
|
||||
|
||||
await store.addFindings(run.id, findings);
|
||||
|
||||
const { scopeType, scopeKey } = getReviewSessionScope(run);
|
||||
const session = kernelSessionRepository.ensureSession({
|
||||
scopeType,
|
||||
scopeKey,
|
||||
metadata: {
|
||||
owner: run.owner,
|
||||
repo: run.repo,
|
||||
prNumber: run.prNumber,
|
||||
eventType: run.eventType,
|
||||
headSha: run.headSha,
|
||||
},
|
||||
runId: run.id,
|
||||
});
|
||||
|
||||
return { run, findings, session };
|
||||
}
|
||||
|
||||
function getRelevantEvents(sessionId: string): KernelSessionEventRecord[] {
|
||||
return kernelSessionRepository
|
||||
.listEvents(sessionId)
|
||||
.filter((event) =>
|
||||
[
|
||||
'human_feedback_processed',
|
||||
'session_continue_requested',
|
||||
'session_continue_completed',
|
||||
].includes(event.eventType)
|
||||
);
|
||||
}
|
||||
|
||||
function getEventByType(
|
||||
events: KernelSessionEventRecord[],
|
||||
eventType: KernelSessionEventRecord['eventType']
|
||||
): KernelSessionEventRecord | undefined {
|
||||
return events.find((event) => event.eventType === eventType);
|
||||
}
|
||||
|
||||
describe('feedback kernel session integration', () => {
|
||||
let tempDir: string;
|
||||
let app: Hono;
|
||||
let store: FileReviewStore;
|
||||
let savedDbPath: string | undefined;
|
||||
let pullRequestCommentCalls: string[];
|
||||
let commitCommentCalls: string[];
|
||||
let continueSessionCalls: string[];
|
||||
|
||||
const originalAddPullRequestComment = giteaService.addPullRequestComment;
|
||||
const originalAddCommitComment = giteaService.addCommitComment;
|
||||
const originalContinueSession = kernelReviewEngine.continueSession;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(path.join(tmpdir(), 'feedback-kernel-session-'));
|
||||
savedDbPath = process.env.DATABASE_PATH;
|
||||
process.env.DATABASE_PATH = path.join(tempDir, 'assistant.db');
|
||||
|
||||
initDatabase();
|
||||
|
||||
store = new FileReviewStore(path.join(tempDir, 'review-workdir'));
|
||||
await store.init();
|
||||
initializeFeedbackSystem(store);
|
||||
|
||||
app = createTestApp();
|
||||
pullRequestCommentCalls = [];
|
||||
commitCommentCalls = [];
|
||||
continueSessionCalls = [];
|
||||
|
||||
giteaService.addPullRequestComment = async (_owner, _repo, _prNumber, body) => {
|
||||
pullRequestCommentCalls.push(body);
|
||||
};
|
||||
giteaService.addCommitComment = async (_owner, _repo, _commitSha, body) => {
|
||||
commitCommentCalls.push(body);
|
||||
};
|
||||
kernelReviewEngine.continueSession = async (sessionId: string) => {
|
||||
continueSessionCalls.push(sessionId);
|
||||
const session = kernelSessionRepository.getSessionById(sessionId);
|
||||
if (!session?.lastRunId) {
|
||||
return false;
|
||||
}
|
||||
kernelSessionRepository.appendEvent(sessionId, 'session_continue_requested', {
|
||||
runId: session.lastRunId,
|
||||
});
|
||||
kernelSessionRepository.appendEvent(sessionId, 'session_continue_completed', {
|
||||
runId: session.lastRunId,
|
||||
});
|
||||
return true;
|
||||
};
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
giteaService.addPullRequestComment = originalAddPullRequestComment;
|
||||
giteaService.addCommitComment = originalAddCommitComment;
|
||||
kernelReviewEngine.continueSession = originalContinueSession;
|
||||
|
||||
closeDatabase();
|
||||
if (savedDbPath === undefined) {
|
||||
Reflect.deleteProperty(process.env, 'DATABASE_PATH');
|
||||
} else {
|
||||
process.env.DATABASE_PATH = savedDbPath;
|
||||
}
|
||||
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test('approve success publishes comment records events and triggers continuation when last pending finding is processed', async () => {
|
||||
const { run, findings, session } = await seedRunWithSession(store, {
|
||||
keySuffix: 'approve-success',
|
||||
});
|
||||
|
||||
const { response, payload } = await jsonRequest(app, findings[0].id, true, 'needs fix');
|
||||
const runDetails = await store.getRunDetails(run.id);
|
||||
const persistedFinding = await store.getFinding(findings[0].id);
|
||||
const events = getRelevantEvents(session.id);
|
||||
|
||||
expect(response.status).toBe(200);
|
||||
expect(payload).toMatchObject({
|
||||
success: true,
|
||||
message: '已标记为有效问题并发布到Gitea',
|
||||
published: true,
|
||||
});
|
||||
expect(persistedFinding?.published).toBe(true);
|
||||
expect(runDetails?.comments).toHaveLength(1);
|
||||
expect(runDetails?.comments[0]).toMatchObject({
|
||||
runId: run.id,
|
||||
status: 'published',
|
||||
path: findings[0].path,
|
||||
line: findings[0].line,
|
||||
fingerprint: findings[0].fingerprint,
|
||||
} satisfies Partial<ReviewCommentRecord>);
|
||||
expect(runDetails?.comments[0]?.body).toContain('人工审批确认');
|
||||
expect(runDetails?.comments[0]?.body).toContain(findings[0].title);
|
||||
expect(pullRequestCommentCalls).toHaveLength(1);
|
||||
expect(commitCommentCalls).toHaveLength(0);
|
||||
expect(continueSessionCalls).toEqual([session.id]);
|
||||
expect(events.map((event) => event.eventType).sort()).toEqual([
|
||||
'human_feedback_processed',
|
||||
'session_continue_completed',
|
||||
'session_continue_requested',
|
||||
]);
|
||||
expect(getEventByType(events, 'human_feedback_processed')?.payload).toEqual({
|
||||
runId: run.id,
|
||||
findingId: findings[0].id,
|
||||
approved: true,
|
||||
reason: 'needs fix',
|
||||
published: true,
|
||||
});
|
||||
expect(getEventByType(events, 'session_continue_requested')?.payload).toEqual({
|
||||
runId: run.id,
|
||||
});
|
||||
expect(getEventByType(events, 'session_continue_completed')?.payload).toEqual({
|
||||
runId: run.id,
|
||||
});
|
||||
});
|
||||
|
||||
test('reject success records local handling without Gitea publish and still triggers continuation on final pending finding', async () => {
|
||||
const { run, findings, session } = await seedRunWithSession(store, {
|
||||
keySuffix: 'reject-success',
|
||||
});
|
||||
|
||||
const { response, payload } = await jsonRequest(app, findings[0].id, false, 'false positive');
|
||||
const runDetails = await store.getRunDetails(run.id);
|
||||
const persistedFinding = await store.getFinding(findings[0].id);
|
||||
const events = getRelevantEvents(session.id);
|
||||
|
||||
expect(response.status).toBe(200);
|
||||
expect(payload).toMatchObject({
|
||||
success: true,
|
||||
message: '已标记为误报',
|
||||
published: false,
|
||||
});
|
||||
expect(persistedFinding?.published).toBe(true);
|
||||
expect(runDetails?.comments).toHaveLength(1);
|
||||
expect(runDetails?.comments[0]).toMatchObject({
|
||||
runId: run.id,
|
||||
status: 'published',
|
||||
fingerprint: findings[0].fingerprint,
|
||||
} satisfies Partial<ReviewCommentRecord>);
|
||||
expect(runDetails?.comments[0]?.body).toBe(`REJECTED: ${findings[0].title} - false positive`);
|
||||
expect(pullRequestCommentCalls).toHaveLength(0);
|
||||
expect(commitCommentCalls).toHaveLength(0);
|
||||
expect(continueSessionCalls).toEqual([session.id]);
|
||||
expect(events.map((event) => event.eventType).sort()).toEqual([
|
||||
'human_feedback_processed',
|
||||
'session_continue_completed',
|
||||
'session_continue_requested',
|
||||
]);
|
||||
expect(getEventByType(events, 'human_feedback_processed')?.payload).toEqual({
|
||||
runId: run.id,
|
||||
findingId: findings[0].id,
|
||||
approved: false,
|
||||
reason: 'false positive',
|
||||
published: false,
|
||||
});
|
||||
});
|
||||
|
||||
test('duplicate approve returns idempotent success without duplicating comment records or continuation events', async () => {
|
||||
const { run, findings, session } = await seedRunWithSession(store, {
|
||||
keySuffix: 'duplicate-approve',
|
||||
});
|
||||
|
||||
const first = await jsonRequest(app, findings[0].id, true, 'needs fix');
|
||||
const second = await jsonRequest(app, findings[0].id, true, 'retry request');
|
||||
const runDetails = await store.getRunDetails(run.id);
|
||||
const events = getRelevantEvents(session.id);
|
||||
|
||||
expect(first.response.status).toBe(200);
|
||||
expect(second.response.status).toBe(200);
|
||||
expect(second.payload).toMatchObject({
|
||||
success: true,
|
||||
message: '该finding已处理过',
|
||||
alreadyProcessed: true,
|
||||
published: true,
|
||||
});
|
||||
expect(runDetails?.comments).toHaveLength(1);
|
||||
expect(runDetails?.comments[0]?.fingerprint).toBe(findings[0].fingerprint);
|
||||
expect(pullRequestCommentCalls).toHaveLength(1);
|
||||
expect(continueSessionCalls).toEqual([session.id]);
|
||||
expect(events.map((event) => event.eventType).sort()).toEqual([
|
||||
'human_feedback_processed',
|
||||
'session_continue_completed',
|
||||
'session_continue_requested',
|
||||
]);
|
||||
});
|
||||
|
||||
test('Gitea publish failure rolls back published flag and leaves no local comment record or session events', async () => {
|
||||
const { run, findings, session } = await seedRunWithSession(store, {
|
||||
keySuffix: 'gitea-fail',
|
||||
});
|
||||
|
||||
giteaService.addPullRequestComment = async () => {
|
||||
throw new Error('gitea unavailable');
|
||||
};
|
||||
|
||||
const { response, payload } = await jsonRequest(app, findings[0].id, true, 'needs fix');
|
||||
const runDetails = await store.getRunDetails(run.id);
|
||||
const persistedFinding = await store.getFinding(findings[0].id);
|
||||
const events = getRelevantEvents(session.id);
|
||||
|
||||
expect(response.status).toBe(500);
|
||||
expect(payload).toMatchObject({
|
||||
error: 'Failed to process feedback',
|
||||
details: 'gitea unavailable',
|
||||
});
|
||||
expect(persistedFinding?.published).toBe(false);
|
||||
expect(runDetails?.comments).toEqual([]);
|
||||
expect(events).toEqual([]);
|
||||
expect(continueSessionCalls).toEqual([]);
|
||||
});
|
||||
|
||||
test('local comment record failure rolls back published flag after Gitea publish and leaves no session events', async () => {
|
||||
const { run, findings, session } = await seedRunWithSession(store, {
|
||||
keySuffix: 'local-record-fail',
|
||||
});
|
||||
const originalAddCommentRecord = store.addCommentRecord.bind(store);
|
||||
|
||||
store.addCommentRecord = async () => {
|
||||
throw new Error('local store write failed');
|
||||
};
|
||||
|
||||
const { response, payload } = await jsonRequest(app, findings[0].id, true, 'needs fix');
|
||||
const runDetails = await store.getRunDetails(run.id);
|
||||
const persistedFinding = await store.getFinding(findings[0].id);
|
||||
const events = getRelevantEvents(session.id);
|
||||
|
||||
store.addCommentRecord = originalAddCommentRecord;
|
||||
|
||||
expect(response.status).toBe(500);
|
||||
expect(payload).toMatchObject({
|
||||
error: 'Failed to process feedback',
|
||||
details:
|
||||
'Comment published to Gitea but failed to save locally. State rolled back, you may retry. Note: immediate retry may create duplicate comments.',
|
||||
});
|
||||
expect(persistedFinding?.published).toBe(false);
|
||||
expect(runDetails?.comments).toEqual([]);
|
||||
expect(pullRequestCommentCalls).toHaveLength(1);
|
||||
expect(events).toEqual([]);
|
||||
expect(continueSessionCalls).toEqual([]);
|
||||
});
|
||||
});
|
||||
@@ -160,7 +160,7 @@ describe('llm-config controller', () => {
|
||||
|
||||
const { data: roles } = await jsonRequest(app, 'GET', '/roles');
|
||||
const assignedRoles = roles.filter((r: any) => r.providerId !== null);
|
||||
expect(assignedRoles).toHaveLength(4);
|
||||
expect(assignedRoles).toHaveLength(2);
|
||||
});
|
||||
|
||||
test('rejects missing required fields', async () => {
|
||||
@@ -326,7 +326,7 @@ describe('llm-config controller', () => {
|
||||
test('returns all MODEL_ROLES with null assignments when unassigned', async () => {
|
||||
const { status, data } = await jsonRequest(app, 'GET', '/roles');
|
||||
expect(status).toBe(200);
|
||||
expect(data).toHaveLength(4);
|
||||
expect(data).toHaveLength(2);
|
||||
expect(data[0]).toHaveProperty('role');
|
||||
expect(data[0]).toHaveProperty('providerId');
|
||||
});
|
||||
|
||||
@@ -1,8 +1,16 @@
|
||||
import { Hono } from 'hono';
|
||||
import { sign } from 'hono/jwt';
|
||||
import { kernelSessionRepository } from '../agent-kernel/session/session-repository';
|
||||
import config from '../config';
|
||||
import { repositoryReviewPromptRepo } from '../db/repositories/repository-review-prompt-repo';
|
||||
import { reviewEngine } from '../review/engine';
|
||||
import { kernelReviewEngine } from '../review/kernel/kernel-review-engine';
|
||||
import {
|
||||
buildReviewPlanSnapshot,
|
||||
buildReviewSessionSummary,
|
||||
buildReviewTimeline,
|
||||
filterEventsByLatestHeadSha,
|
||||
} from '../review/kernel/session-read-model';
|
||||
import { getActiveReviewEngine } from '../review/review-engine-provider';
|
||||
import { giteaService } from '../services/gitea';
|
||||
import { toErrorLogMeta } from '../utils/error-log';
|
||||
import { logger } from '../utils/logger';
|
||||
@@ -173,7 +181,7 @@ protectedRoutes.delete('/repositories/:owner/:repo/webhook/:hookId', async (c) =
|
||||
protectedRoutes.get('/review/runs', async (c) => {
|
||||
try {
|
||||
const limit = Number.parseInt(c.req.query('limit') || '50', 10);
|
||||
const runs = await reviewEngine.listRuns(limit);
|
||||
const runs = await getActiveReviewEngine().listRuns(limit);
|
||||
return c.json({ data: runs });
|
||||
} catch (error: any) {
|
||||
logger.error('获取审查任务列表失败:', error);
|
||||
@@ -185,7 +193,7 @@ protectedRoutes.get('/review/runs', async (c) => {
|
||||
protectedRoutes.get('/review/runs/:runId', async (c) => {
|
||||
try {
|
||||
const { runId } = c.req.param();
|
||||
const result = await reviewEngine.getRunDetails(runId);
|
||||
const result = await getActiveReviewEngine().getRunDetails(runId);
|
||||
if (!result) {
|
||||
return c.json({ message: 'Run not found' }, 404);
|
||||
}
|
||||
@@ -196,6 +204,92 @@ protectedRoutes.get('/review/runs/:runId', async (c) => {
|
||||
}
|
||||
});
|
||||
|
||||
protectedRoutes.get('/review/sessions', (c) => {
|
||||
try {
|
||||
const limit = Number.parseInt(c.req.query('limit') || '50', 10);
|
||||
const catalog = kernelReviewEngine.listTaskCatalog();
|
||||
const sessions = kernelSessionRepository.listSessions(limit).map((session) => {
|
||||
const checkpoint = kernelSessionRepository.loadCheckpoint(session.id);
|
||||
const events = kernelSessionRepository.listEvents(session.id);
|
||||
const plan = buildReviewPlanSnapshot(catalog, checkpoint, events);
|
||||
return {
|
||||
session,
|
||||
summary: buildReviewSessionSummary(session, checkpoint, events, plan),
|
||||
};
|
||||
});
|
||||
|
||||
return c.json({ data: sessions });
|
||||
} catch (error: any) {
|
||||
logger.error('获取审查会话列表失败:', error);
|
||||
return c.json({ message: 'Failed to fetch review sessions', error: error.message }, 500);
|
||||
}
|
||||
});
|
||||
|
||||
protectedRoutes.get('/review/sessions/:sessionId', async (c) => {
|
||||
try {
|
||||
const { sessionId } = c.req.param();
|
||||
const session = kernelSessionRepository.getSessionById(sessionId);
|
||||
if (!session) {
|
||||
return c.json({ message: 'Session not found' }, 404);
|
||||
}
|
||||
|
||||
const checkpoint = kernelSessionRepository.loadCheckpoint(sessionId);
|
||||
const allEvents = kernelSessionRepository.listEvents(sessionId);
|
||||
const events = filterEventsByLatestHeadSha(allEvents, checkpoint);
|
||||
const catalog = kernelReviewEngine.listTaskCatalog();
|
||||
const plan = buildReviewPlanSnapshot(catalog, checkpoint, events);
|
||||
const summary = buildReviewSessionSummary(session, checkpoint, events, plan);
|
||||
const runDetails = session.lastRunId
|
||||
? await getActiveReviewEngine().getRunDetails(session.lastRunId)
|
||||
: null;
|
||||
const subagentInvocations = kernelSessionRepository.listSubagentInvocations(sessionId);
|
||||
|
||||
return c.json({
|
||||
session,
|
||||
summary,
|
||||
checkpoint,
|
||||
plan,
|
||||
timeline: buildReviewTimeline(events),
|
||||
events,
|
||||
subagentInvocations,
|
||||
runDetails,
|
||||
});
|
||||
} catch (error: any) {
|
||||
logger.error('获取审查会话详情失败:', error);
|
||||
return c.json({ message: 'Failed to fetch review session details', error: error.message }, 500);
|
||||
}
|
||||
});
|
||||
|
||||
protectedRoutes.get('/review/kernel/tasks', (c) => {
|
||||
try {
|
||||
return c.json({ data: kernelReviewEngine.listTaskCatalog() });
|
||||
} catch (error: any) {
|
||||
logger.error('获取 kernel 任务目录失败:', error);
|
||||
return c.json({ message: 'Failed to fetch kernel task catalog', error: error.message }, 500);
|
||||
}
|
||||
});
|
||||
|
||||
protectedRoutes.get('/review/kernel/subagents', (c) => {
|
||||
try {
|
||||
return c.json({ data: kernelReviewEngine.listSubagentCatalog() });
|
||||
} catch (error: any) {
|
||||
logger.error('获取 kernel subagent 目录失败:', error);
|
||||
return c.json(
|
||||
{ message: 'Failed to fetch kernel subagent catalog', error: error.message },
|
||||
500
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
protectedRoutes.get('/review/kernel/hooks', (c) => {
|
||||
try {
|
||||
return c.json({ data: kernelReviewEngine.listHookCatalog() });
|
||||
} catch (error: any) {
|
||||
logger.error('获取 kernel hook 目录失败:', error);
|
||||
return c.json({ message: 'Failed to fetch kernel hook catalog', error: error.message }, 500);
|
||||
}
|
||||
});
|
||||
|
||||
export const adminController = {
|
||||
publicRoutes,
|
||||
protectedRoutes,
|
||||
|
||||
@@ -1,33 +1,22 @@
|
||||
import { zValidator } from '@hono/zod-validator';
|
||||
import { Hono } from 'hono';
|
||||
import { z } from 'zod';
|
||||
import { kernelSessionRepository } from '../agent-kernel/session/session-repository';
|
||||
import config from '../config';
|
||||
import { LearningSystem } from '../review/learning/learning-system';
|
||||
import { VectorMemoryStore } from '../review/memory/vector-store';
|
||||
import { kernelReviewEngine } from '../review/kernel/kernel-review-engine';
|
||||
import { getReviewSessionScope } from '../review/kernel/session-scope';
|
||||
import { FileReviewStore } from '../review/store/file-review-store';
|
||||
import { giteaService } from '../services/gitea';
|
||||
|
||||
const feedbackRouter = new Hono();
|
||||
|
||||
// 全局实例
|
||||
let memoryStore: VectorMemoryStore | null = null;
|
||||
let learningSystem: LearningSystem | null = null;
|
||||
let reviewStore: FileReviewStore | null = null;
|
||||
|
||||
// 初始化反馈系统(记忆系统可选)
|
||||
export function initializeFeedbackSystem(store: FileReviewStore): void {
|
||||
// 保存store实例以供handlers重用,避免多实例状态不同步
|
||||
reviewStore = store;
|
||||
|
||||
// 记忆系统为可选功能
|
||||
if (config.review.qdrantUrl && config.review.enableMemory) {
|
||||
memoryStore = new VectorMemoryStore(config.review.qdrantUrl);
|
||||
learningSystem = new LearningSystem(memoryStore, reviewStore);
|
||||
|
||||
memoryStore.initialize().catch((err) => {
|
||||
console.error('Failed to initialize memory store:', err);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 提交人工反馈
|
||||
@@ -62,6 +51,10 @@ feedbackRouter.post(
|
||||
return c.json({ error: 'Run not found' }, 404);
|
||||
}
|
||||
|
||||
const session = kernelSessionRepository.getSessionByScopeKey(
|
||||
getReviewSessionScope(runDetails.run).scopeKey
|
||||
);
|
||||
|
||||
const { owner, repo } = runDetails.run;
|
||||
|
||||
// 原子幂等性保护:先标记finding为published(原子check-and-set)
|
||||
@@ -87,7 +80,6 @@ feedbackRouter.post(
|
||||
success: true,
|
||||
message: '该finding已处理过',
|
||||
alreadyProcessed: true,
|
||||
learningApplied: false,
|
||||
published: true,
|
||||
});
|
||||
}
|
||||
@@ -105,32 +97,6 @@ feedbackRouter.post(
|
||||
|
||||
// 以下代码只会被第一个请求执行(wasUnpublished=true)
|
||||
|
||||
let learningApplied = false;
|
||||
|
||||
// 如果记忆系统启用,尝试执行学习和向量存储(可选功能,失败不阻止审批流程)
|
||||
if (memoryStore && learningSystem) {
|
||||
try {
|
||||
await memoryStore.storeFeedback(findingId, approved, reason || '', owner, repo);
|
||||
|
||||
if (approved) {
|
||||
await learningSystem.learnFromApproval(finding, owner, repo);
|
||||
} else {
|
||||
await learningSystem.learnFromFalsePositive(
|
||||
finding,
|
||||
reason || '人工标记为误报',
|
||||
owner,
|
||||
repo
|
||||
);
|
||||
}
|
||||
|
||||
learningApplied = true;
|
||||
} catch (memoryError) {
|
||||
// 记忆系统故障不应阻止人工审批操作
|
||||
console.error('Memory system operation failed (non-fatal):', memoryError);
|
||||
learningApplied = false;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
// 如果批准,发布到Gitea(人工审批通过的问题应该通知开发者)
|
||||
if (approved) {
|
||||
@@ -210,11 +176,29 @@ _此问题已通过人工审批确认_`;
|
||||
}
|
||||
|
||||
// finding已在开头原子标记为published,处理成功则保持published状态
|
||||
if (session) {
|
||||
kernelSessionRepository.appendEvent(session.id, 'human_feedback_processed', {
|
||||
runId: finding.runId,
|
||||
findingId,
|
||||
approved,
|
||||
reason: reason || null,
|
||||
published: approved,
|
||||
});
|
||||
|
||||
if (config.review.engine === 'kernel') {
|
||||
const latestRunDetails = await reviewStore.getRunDetails(finding.runId);
|
||||
const hasRemainingPendingFindings =
|
||||
latestRunDetails?.findings.some((item) => !item.published) ?? false;
|
||||
|
||||
if (!hasRemainingPendingFindings) {
|
||||
await kernelReviewEngine.continueSession(session.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return c.json({
|
||||
success: true,
|
||||
message: approved ? '已标记为有效问题并发布到Gitea' : '已标记为误报',
|
||||
learningApplied,
|
||||
published: approved,
|
||||
});
|
||||
} catch (error) {
|
||||
|
||||
@@ -94,7 +94,7 @@ llmConfigRouter.post('/providers', async (c) => {
|
||||
|
||||
const allProviders = providerRepo.list();
|
||||
if (allProviders.length === 1) {
|
||||
const modelRolesToBind: ModelRole[] = ['planner', 'specialist', 'judge', 'embedding'];
|
||||
const modelRolesToBind: ModelRole[] = ['planner', 'specialist'];
|
||||
for (const role of modelRolesToBind) {
|
||||
modelRoleRepo.set(role, created.id, body.defaultModel);
|
||||
}
|
||||
|
||||
@@ -2,10 +2,9 @@ import * as crypto from 'node:crypto';
|
||||
import { Context } from 'hono';
|
||||
import { map } from 'lodash-es';
|
||||
import config from '../config';
|
||||
import { codexEngine } from '../review/codex/codex-engine';
|
||||
import { LocalRepoManager } from '../review/context/local-repo-manager';
|
||||
import { SandboxExec } from '../review/context/sandbox-exec';
|
||||
import { reviewEngine } from '../review/engine';
|
||||
import { getActiveReviewEngine, getReviewEngineLabel } from '../review/review-engine-provider';
|
||||
import { PullRequestDetails, giteaService } from '../services/gitea';
|
||||
import { getNotificationManager } from '../services/notification-manager';
|
||||
import type { NotificationContext } from '../services/notification/types';
|
||||
@@ -175,7 +174,7 @@ async function handlePullRequestEvent(c: Context, body: any): Promise<Response>
|
||||
|
||||
// 包含baseSha以支持retarget场景:相同headSha但baseSha变化时需要重新审查
|
||||
const idempotencyKey = `${owner}/${repoName}#${prNumber}:${baseSha}...${headSha}`;
|
||||
const engineInstance = config.review.engine === 'codex' ? codexEngine : reviewEngine;
|
||||
const engineInstance = getActiveReviewEngine();
|
||||
const { run, reused } = await engineInstance.enqueuePullRequest({
|
||||
eventType: 'pull_request',
|
||||
idempotencyKey,
|
||||
@@ -188,7 +187,7 @@ async function handlePullRequestEvent(c: Context, body: any): Promise<Response>
|
||||
headSha,
|
||||
});
|
||||
|
||||
const engineLabel = config.review.engine === 'codex' ? 'Codex' : 'Agent';
|
||||
const engineLabel = getReviewEngineLabel();
|
||||
return c.json(
|
||||
{
|
||||
status: reused ? 'deduplicated' : 'accepted',
|
||||
@@ -322,7 +321,7 @@ async function handleCommitStatusEvent(c: Context, body: any): Promise<Response>
|
||||
}
|
||||
|
||||
const idempotencyKey = `${owner}/${repoName}@${commitSha}`;
|
||||
const engineInstance = config.review.engine === 'codex' ? codexEngine : reviewEngine;
|
||||
const engineInstance = getActiveReviewEngine();
|
||||
const { run, reused } = await engineInstance.enqueueCommit({
|
||||
eventType: 'commit_status',
|
||||
idempotencyKey,
|
||||
@@ -334,7 +333,7 @@ async function handleCommitStatusEvent(c: Context, body: any): Promise<Response>
|
||||
relatedPrNumber: relatedPR?.number,
|
||||
});
|
||||
|
||||
const engineLabel = config.review.engine === 'codex' ? 'Codex' : 'Agent';
|
||||
const engineLabel = getReviewEngineLabel();
|
||||
return c.json(
|
||||
{
|
||||
status: reused ? 'deduplicated' : 'accepted',
|
||||
|
||||
@@ -116,7 +116,7 @@ describe('migration 002 remove legacy review mode', () => {
|
||||
const engineRow = db
|
||||
.query('SELECT value FROM system_settings WHERE key = ?')
|
||||
.get('REVIEW_ENGINE') as { value: string } | null;
|
||||
expect(engineRow?.value).toBe('agent');
|
||||
expect(engineRow?.value).toBe('kernel');
|
||||
|
||||
const roles = db
|
||||
.query('SELECT role FROM model_role_assignments ORDER BY role ASC')
|
||||
|
||||
@@ -11,12 +11,55 @@ function createInconsistentMigrationState(dbPath: string): void {
|
||||
db.exec('PRAGMA foreign_keys = ON');
|
||||
db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS _migrations (
|
||||
version INTEGER PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
version INTEGER PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
applied_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)
|
||||
`);
|
||||
|
||||
db.exec(`
|
||||
CREATE TABLE llm_providers (
|
||||
id TEXT PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
type TEXT NOT NULL,
|
||||
base_url TEXT,
|
||||
default_model TEXT NOT NULL,
|
||||
is_enabled INTEGER NOT NULL DEFAULT 1,
|
||||
extra_config TEXT DEFAULT '{}',
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)
|
||||
`);
|
||||
|
||||
db.exec(`
|
||||
CREATE TABLE llm_secrets (
|
||||
provider_id TEXT PRIMARY KEY REFERENCES llm_providers(id) ON DELETE CASCADE,
|
||||
ciphertext BLOB NOT NULL,
|
||||
iv BLOB NOT NULL,
|
||||
auth_tag BLOB NOT NULL,
|
||||
key_version INTEGER NOT NULL DEFAULT 1,
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)
|
||||
`);
|
||||
|
||||
db.exec(`
|
||||
CREATE TABLE model_role_assignments (
|
||||
role TEXT PRIMARY KEY CHECK (role IN ('planner','specialist','judge','embedding')),
|
||||
provider_id TEXT NOT NULL REFERENCES llm_providers(id),
|
||||
model TEXT NOT NULL,
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)
|
||||
`);
|
||||
|
||||
db.exec(`
|
||||
CREATE TABLE system_settings (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL,
|
||||
is_sensitive INTEGER NOT NULL DEFAULT 0,
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)
|
||||
`);
|
||||
|
||||
db.query('INSERT INTO _migrations (version, name) VALUES (?, ?)').run(
|
||||
1,
|
||||
'init_llm_provider_schema'
|
||||
|
||||
@@ -78,7 +78,7 @@ describe('model-role-repo', () => {
|
||||
});
|
||||
|
||||
test('can assign different roles', () => {
|
||||
const roles: ModelRole[] = ['planner', 'specialist', 'judge', 'embedding'];
|
||||
const roles: ModelRole[] = ['planner', 'specialist'];
|
||||
for (const role of roles) {
|
||||
modelRoleRepo.set(role, providerId, `model-for-${role}`);
|
||||
}
|
||||
@@ -125,7 +125,6 @@ describe('model-role-repo', () => {
|
||||
|
||||
test('results are ordered by role', () => {
|
||||
modelRoleRepo.set('specialist', providerId, 'model-a');
|
||||
modelRoleRepo.set('embedding', providerId, 'model-b');
|
||||
modelRoleRepo.set('planner', providerId, 'model-c');
|
||||
|
||||
const all = modelRoleRepo.list();
|
||||
@@ -158,13 +157,11 @@ describe('model-role-repo', () => {
|
||||
test('returns all roles assigned to a provider', () => {
|
||||
modelRoleRepo.set('specialist', providerId, 'gpt-4o-mini');
|
||||
modelRoleRepo.set('planner', providerId, 'gpt-4o');
|
||||
modelRoleRepo.set('judge', providerId, 'gpt-4o');
|
||||
|
||||
const roles = modelRoleRepo.getRolesByProvider(providerId);
|
||||
expect(roles).toHaveLength(3);
|
||||
expect(roles).toHaveLength(2);
|
||||
expect(roles).toContain('specialist');
|
||||
expect(roles).toContain('planner');
|
||||
expect(roles).toContain('judge');
|
||||
});
|
||||
|
||||
test('does not return roles assigned to other providers', () => {
|
||||
|
||||
@@ -12,6 +12,9 @@ import { dirname, resolve } from 'node:path';
|
||||
import { migration001Init } from './migrations/001_init';
|
||||
import { migration002RemoveLegacyReviewMode } from './migrations/002_remove_legacy_review_mode';
|
||||
import { migration003RepositoryReviewPrompts } from './migrations/003_repository_review_prompts';
|
||||
import { migration004AgentKernelSessions } from './migrations/004_agent_kernel_sessions';
|
||||
import { migration005AgentKernelSubagentInvocations } from './migrations/005_agent_kernel_subagent_invocations';
|
||||
import { migration006RemoveJudgeEmbeddingRoles } from './migrations/006_remove_judge_embedding_roles';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
@@ -31,6 +34,9 @@ const MIGRATIONS: Migration[] = [
|
||||
migration001Init,
|
||||
migration002RemoveLegacyReviewMode,
|
||||
migration003RepositoryReviewPrompts,
|
||||
migration004AgentKernelSessions,
|
||||
migration005AgentKernelSubagentInvocations,
|
||||
migration006RemoveJudgeEmbeddingRoles,
|
||||
];
|
||||
|
||||
const REPOSITORY_REVIEW_PROMPTS_TABLE = 'repository_review_prompts';
|
||||
|
||||
@@ -9,7 +9,7 @@ export const migration002RemoveLegacyReviewMode: Migration = {
|
||||
|
||||
up(db: Database): void {
|
||||
db.exec(
|
||||
"UPDATE system_settings SET value = 'agent' WHERE key = 'REVIEW_ENGINE' AND value NOT IN ('agent','codex')"
|
||||
"UPDATE system_settings SET value = 'kernel' WHERE key = 'REVIEW_ENGINE' AND value NOT IN ('codex','kernel')"
|
||||
);
|
||||
|
||||
db.exec(`
|
||||
|
||||
46
src/db/migrations/004_agent_kernel_sessions.ts
Normal file
46
src/db/migrations/004_agent_kernel_sessions.ts
Normal file
@@ -0,0 +1,46 @@
|
||||
import type { Database } from 'bun:sqlite';
|
||||
import type { Migration } from '../database';
|
||||
|
||||
export const migration004AgentKernelSessions: Migration = {
|
||||
version: 4,
|
||||
name: 'agent_kernel_sessions',
|
||||
|
||||
up(db: Database): void {
|
||||
db.exec(`
|
||||
CREATE TABLE agent_kernel_sessions (
|
||||
id TEXT PRIMARY KEY,
|
||||
scope_type TEXT NOT NULL CHECK (scope_type IN ('pull_request', 'commit')),
|
||||
scope_key TEXT NOT NULL UNIQUE,
|
||||
metadata_json TEXT NOT NULL DEFAULT '{}',
|
||||
last_run_id TEXT,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)
|
||||
`);
|
||||
|
||||
db.exec(`
|
||||
CREATE TABLE agent_kernel_session_events (
|
||||
id TEXT PRIMARY KEY,
|
||||
session_id TEXT NOT NULL REFERENCES agent_kernel_sessions(id) ON DELETE CASCADE,
|
||||
event_type TEXT NOT NULL,
|
||||
payload_json TEXT NOT NULL DEFAULT '{}',
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)
|
||||
`);
|
||||
|
||||
db.exec(`
|
||||
CREATE TABLE agent_kernel_session_checkpoints (
|
||||
session_id TEXT PRIMARY KEY REFERENCES agent_kernel_sessions(id) ON DELETE CASCADE,
|
||||
state_json TEXT NOT NULL,
|
||||
pending_tasks_json TEXT NOT NULL,
|
||||
stop_reason TEXT,
|
||||
state_version INTEGER NOT NULL DEFAULT 1,
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)
|
||||
`);
|
||||
|
||||
db.exec(
|
||||
'CREATE INDEX idx_agent_kernel_events_session ON agent_kernel_session_events(session_id, created_at)'
|
||||
);
|
||||
},
|
||||
};
|
||||
29
src/db/migrations/005_agent_kernel_subagent_invocations.ts
Normal file
29
src/db/migrations/005_agent_kernel_subagent_invocations.ts
Normal file
@@ -0,0 +1,29 @@
|
||||
import type { Database } from 'bun:sqlite';
|
||||
import type { Migration } from '../database';
|
||||
|
||||
export const migration005AgentKernelSubagentInvocations: Migration = {
|
||||
version: 5,
|
||||
name: 'agent_kernel_subagent_invocations',
|
||||
|
||||
up(db: Database): void {
|
||||
db.exec(`
|
||||
CREATE TABLE agent_kernel_subagent_invocations (
|
||||
id TEXT PRIMARY KEY,
|
||||
parent_session_id TEXT NOT NULL REFERENCES agent_kernel_sessions(id) ON DELETE CASCADE,
|
||||
parent_run_id TEXT NOT NULL,
|
||||
parent_task_name TEXT NOT NULL,
|
||||
subagent_name TEXT NOT NULL,
|
||||
agent_id TEXT NOT NULL,
|
||||
status TEXT NOT NULL CHECK (status IN ('running', 'completed', 'failed')),
|
||||
input_json TEXT NOT NULL,
|
||||
result_json TEXT,
|
||||
started_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
finished_at TEXT
|
||||
)
|
||||
`);
|
||||
|
||||
db.exec(
|
||||
'CREATE INDEX idx_agent_kernel_subagent_invocations_session ON agent_kernel_subagent_invocations(parent_session_id, started_at)'
|
||||
);
|
||||
},
|
||||
};
|
||||
30
src/db/migrations/006_remove_judge_embedding_roles.ts
Normal file
30
src/db/migrations/006_remove_judge_embedding_roles.ts
Normal file
@@ -0,0 +1,30 @@
|
||||
import type { Database } from 'bun:sqlite';
|
||||
import type { Migration } from '../database';
|
||||
|
||||
const ALLOWED_ROLES = "'planner','specialist'";
|
||||
|
||||
export const migration006RemoveJudgeEmbeddingRoles: Migration = {
|
||||
version: 6,
|
||||
name: 'remove_judge_embedding_roles',
|
||||
|
||||
up(db: Database): void {
|
||||
db.exec(`
|
||||
CREATE TABLE model_role_assignments_new (
|
||||
role TEXT PRIMARY KEY CHECK (role IN (${ALLOWED_ROLES})),
|
||||
provider_id TEXT NOT NULL REFERENCES llm_providers(id),
|
||||
model TEXT NOT NULL,
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)
|
||||
`);
|
||||
|
||||
db.exec(`
|
||||
INSERT INTO model_role_assignments_new (role, provider_id, model, updated_at)
|
||||
SELECT role, provider_id, model, updated_at
|
||||
FROM model_role_assignments
|
||||
WHERE role IN (${ALLOWED_ROLES})
|
||||
`);
|
||||
|
||||
db.exec('DROP TABLE model_role_assignments');
|
||||
db.exec('ALTER TABLE model_role_assignments_new RENAME TO model_role_assignments');
|
||||
},
|
||||
};
|
||||
@@ -1,6 +1,6 @@
|
||||
/**
|
||||
* Repository for model_role_assignments table.
|
||||
* Maps business roles (planner, specialist, judge, embedding)
|
||||
* Maps business roles (planner, specialist)
|
||||
* to specific provider + model combinations.
|
||||
*/
|
||||
|
||||
@@ -10,7 +10,7 @@ import { getDatabase } from '../database';
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export type ModelRole = 'planner' | 'specialist' | 'judge' | 'embedding';
|
||||
export type ModelRole = 'planner' | 'specialist';
|
||||
|
||||
export interface RoleAssignmentRow {
|
||||
role: ModelRole;
|
||||
|
||||
34
src/index.ts
34
src/index.ts
@@ -9,14 +9,21 @@ import { llmConfigRouter } from './controllers/llm-config';
|
||||
import { handleGiteaWebhook } from './controllers/review';
|
||||
import { initMasterKey } from './crypto/secrets';
|
||||
import { initDatabase } from './db/database';
|
||||
import { installE2EMockLLMGateway } from './llm/e2e-mock';
|
||||
import { llmGateway } from './llm/gateway';
|
||||
import { cleanupScheduler } from './review/cleanup-scheduler';
|
||||
import { codexEngine } from './review/codex/codex-engine';
|
||||
import { mcpRouter } from './review/codex/mcp-handler';
|
||||
import { reviewEngine } from './review/engine';
|
||||
import { getActiveReviewEngine } from './review/review-engine-provider';
|
||||
|
||||
initMasterKey();
|
||||
initDatabase();
|
||||
configManager.seedDefaults();
|
||||
installE2EMockLLMGateway();
|
||||
|
||||
llmGateway.updateResilienceConfig(config.review.llmMaxConcurrentCalls, {
|
||||
maxAttempts: config.review.llmRetryMaxAttempts,
|
||||
baseDelayMs: config.review.llmRetryBaseDelayMs,
|
||||
});
|
||||
|
||||
// 创建Hono应用实例
|
||||
const app = new Hono();
|
||||
@@ -77,26 +84,21 @@ app.get('*', serveStatic({ path: './public/index.html' }));
|
||||
const port = config.app.port;
|
||||
console.log(`⚡️ 服务启动在 http://localhost:${port}`);
|
||||
|
||||
// 启动审查引擎(根据配置选择)
|
||||
reviewEngine.start().catch((error) => {
|
||||
console.error('❌ 启动Agent Review Engine失败', error);
|
||||
});
|
||||
codexEngine.start().catch((error) => {
|
||||
console.error('❌ 启动Codex Review Engine失败', error);
|
||||
});
|
||||
// 启动当前配置的审查引擎,避免非 active 引擎产生副作用。
|
||||
getActiveReviewEngine()
|
||||
.start()
|
||||
.catch((error) => {
|
||||
console.error('❌ 启动 Review Engine 失败', error);
|
||||
});
|
||||
|
||||
// 启动清理调度器(定期清理过期 mirror/workspace 目录)
|
||||
cleanupScheduler.start();
|
||||
|
||||
// 初始化反馈系统(总是初始化,记忆系统可选)
|
||||
const reviewStore = reviewEngine.getStore();
|
||||
// 初始化反馈系统
|
||||
const reviewStore = getActiveReviewEngine().getStore();
|
||||
initializeFeedbackSystem(reviewStore);
|
||||
|
||||
if (config.review.enableMemory) {
|
||||
console.log('✅ 反馈系统已初始化(含向量记忆)');
|
||||
} else {
|
||||
console.log('✅ 反馈系统已初始化(不含向量记忆)');
|
||||
}
|
||||
console.log('✅ 反馈系统已初始化');
|
||||
|
||||
export default {
|
||||
port,
|
||||
|
||||
@@ -157,20 +157,6 @@ describe('LLMGateway', () => {
|
||||
});
|
||||
});
|
||||
|
||||
// ─── embedForRole: Error Cases ────────────────────────────────────
|
||||
|
||||
describe('embedForRole() — error handling', () => {
|
||||
test('throws LLMNoProviderError when embedding role not assigned', async () => {
|
||||
try {
|
||||
await gateway.embedForRole(['text']);
|
||||
expect(true).toBe(false);
|
||||
} catch (e: any) {
|
||||
expect(e.name).toBe('LLMNoProviderError');
|
||||
expect(e.role).toBe('embedding');
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ─── Cache invalidation ──────────────────────────────────────────
|
||||
|
||||
describe('cache management', () => {
|
||||
|
||||
66
src/llm/__tests__/openai-compatible-provider.test.ts
Normal file
66
src/llm/__tests__/openai-compatible-provider.test.ts
Normal file
@@ -0,0 +1,66 @@
|
||||
import { describe, expect, test } from 'bun:test';
|
||||
import { buildOpenAICompatibleChatParams } from '../providers/openai-compatible';
|
||||
import type { LLMChatRequest, LLMToolDefinition } from '../types';
|
||||
|
||||
const readFileTool: LLMToolDefinition = {
|
||||
name: 'read_file',
|
||||
description: 'Read a file from the workspace',
|
||||
parameters: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
file_path: { type: 'string' },
|
||||
},
|
||||
required: ['file_path'],
|
||||
},
|
||||
};
|
||||
|
||||
function makeRequest(providerOptions?: Record<string, unknown>): LLMChatRequest {
|
||||
return {
|
||||
model: 'ignored-model',
|
||||
messages: [{ role: 'user', content: 'Review this change' }],
|
||||
tools: [readFileTool],
|
||||
providerOptions,
|
||||
};
|
||||
}
|
||||
|
||||
describe('OpenAI compatible provider params', () => {
|
||||
test('passes scalar tool_choice provider option to Chat Completions', () => {
|
||||
const params = buildOpenAICompatibleChatParams(
|
||||
makeRequest({ tool_choice: 'required' }),
|
||||
'gpt-4o'
|
||||
);
|
||||
|
||||
expect(params.tool_choice).toBe('required');
|
||||
expect(params.tools).toEqual([
|
||||
{
|
||||
type: 'function',
|
||||
function: {
|
||||
name: 'read_file',
|
||||
description: 'Read a file from the workspace',
|
||||
parameters: readFileTool.parameters,
|
||||
},
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
test('passes named function tool_choice provider option to Chat Completions', () => {
|
||||
const params = buildOpenAICompatibleChatParams(
|
||||
makeRequest({ tool_choice: { type: 'function', function: { name: 'read_file' } } }),
|
||||
'gpt-4o'
|
||||
);
|
||||
|
||||
expect(params.tool_choice).toEqual({
|
||||
type: 'function',
|
||||
function: { name: 'read_file' },
|
||||
});
|
||||
});
|
||||
|
||||
test('ignores invalid tool_choice provider option', () => {
|
||||
const params = buildOpenAICompatibleChatParams(
|
||||
makeRequest({ tool_choice: { type: 'function', function: {} } }),
|
||||
'gpt-4o'
|
||||
);
|
||||
|
||||
expect(params.tool_choice).toBeUndefined();
|
||||
});
|
||||
});
|
||||
128
src/llm/e2e-mock.ts
Normal file
128
src/llm/e2e-mock.ts
Normal file
@@ -0,0 +1,128 @@
|
||||
import { llmGateway } from './gateway';
|
||||
import type { LLMChatRequest, LLMChatResponse, LLMToolCall, ModelRole } from './types';
|
||||
|
||||
type ChatForRoleFn = (
|
||||
role: ModelRole,
|
||||
request: Omit<LLMChatRequest, 'model'>
|
||||
) => Promise<LLMChatResponse>;
|
||||
|
||||
interface MockResponseConfig {
|
||||
content: string;
|
||||
usage: { promptTokens: number; completionTokens: number; totalTokens: number };
|
||||
}
|
||||
|
||||
const MOCK_RESPONSES: Record<ModelRole, MockResponseConfig> = {
|
||||
specialist: {
|
||||
content: JSON.stringify({
|
||||
findings: [
|
||||
{
|
||||
severity: 'high',
|
||||
confidence: 0.96,
|
||||
path: 'src/user-handler.ts',
|
||||
line: 16,
|
||||
title: 'Caller dereferences nullable user profile fields',
|
||||
detail:
|
||||
'getUserDisplayName accepts UserRecord | null but dereferences user.profile!.displayName! without guarding null or missing profile data. The companion auth/user model shows callers can pass absent users.',
|
||||
evidence:
|
||||
'src/user-handler.ts: return user.profile!.displayName!.toUpperCase(); src/auth.ts: authenticate(...) returns User | null',
|
||||
suggestion:
|
||||
'Return a safe fallback when user/profile/displayName is missing, or reject null before calling getUserDisplayName.',
|
||||
},
|
||||
{
|
||||
severity: 'medium',
|
||||
confidence: 0.85,
|
||||
path: 'src/user-handler.ts',
|
||||
line: 6,
|
||||
title: 'SQL injection via string interpolation',
|
||||
detail:
|
||||
'userId is interpolated directly into the SQL query string, allowing an attacker to inject arbitrary SQL.',
|
||||
evidence: "const query = `SELECT * FROM users WHERE id = '${userId}'`;",
|
||||
suggestion: 'Use parameterized queries instead of string interpolation.',
|
||||
},
|
||||
],
|
||||
}),
|
||||
usage: { promptTokens: 1200, completionTokens: 800, totalTokens: 2000 },
|
||||
},
|
||||
planner: {
|
||||
content: JSON.stringify({
|
||||
summary:
|
||||
'The diff contains a new user-handler module with null safety and SQL injection issues.',
|
||||
keyConcerns: ['Missing null check', 'SQL injection risk'],
|
||||
recommendation: 'Require changes before merging.',
|
||||
}),
|
||||
usage: { promptTokens: 500, completionTokens: 200, totalTokens: 700 },
|
||||
},
|
||||
};
|
||||
|
||||
function toolCall(id: string, name: string, args: Record<string, unknown>): LLMToolCall {
|
||||
return { id, name, arguments: JSON.stringify(args) };
|
||||
}
|
||||
|
||||
function toolCallResponse(toolCalls: LLMToolCall[]): LLMChatResponse {
|
||||
return {
|
||||
content: null,
|
||||
toolCalls,
|
||||
finishReason: 'tool_calls',
|
||||
usage: { promptTokens: 300, completionTokens: 60, totalTokens: 360 },
|
||||
};
|
||||
}
|
||||
|
||||
function stopResponse(config: MockResponseConfig): LLMChatResponse {
|
||||
return {
|
||||
content: config.content,
|
||||
toolCalls: [],
|
||||
finishReason: 'stop',
|
||||
usage: config.usage,
|
||||
};
|
||||
}
|
||||
|
||||
function createAutonomousSpecialistResponse(
|
||||
request: Omit<LLMChatRequest, 'model'>
|
||||
): LLMChatResponse {
|
||||
const toolResultCount = request.messages.filter((message) => message.role === 'tool').length;
|
||||
|
||||
if (toolResultCount === 0) {
|
||||
return toolCallResponse([
|
||||
toolCall('e2e_search_user_handler', 'search_code', {
|
||||
pattern: 'getUserDisplayName|authenticate|findUserByEmail',
|
||||
file_types: ['ts'],
|
||||
max_results: 20,
|
||||
}),
|
||||
]);
|
||||
}
|
||||
|
||||
if (toolResultCount === 1) {
|
||||
return toolCallResponse([
|
||||
toolCall('e2e_read_caller', 'read_file', { file_path: 'src/user-handler.ts' }),
|
||||
]);
|
||||
}
|
||||
|
||||
if (toolResultCount === 2) {
|
||||
return toolCallResponse([
|
||||
toolCall('e2e_read_callee', 'read_file', { file_path: 'src/auth.ts' }),
|
||||
]);
|
||||
}
|
||||
|
||||
return stopResponse(MOCK_RESPONSES.specialist);
|
||||
}
|
||||
|
||||
export function createMockChatForRole(): ChatForRoleFn {
|
||||
return async (role, request) => {
|
||||
if (role === 'specialist' && request.tools?.length) {
|
||||
return createAutonomousSpecialistResponse(request);
|
||||
}
|
||||
|
||||
const config = MOCK_RESPONSES[role];
|
||||
return stopResponse(config);
|
||||
};
|
||||
}
|
||||
|
||||
export function isE2EMockActive(): boolean {
|
||||
return process.env.E2E_MOCK_LLM === '1';
|
||||
}
|
||||
|
||||
export function installE2EMockLLMGateway(): void {
|
||||
if (!isE2EMockActive()) return;
|
||||
console.log('[E2E] LLM mock active — all chatForRole calls return preset responses');
|
||||
llmGateway.chatForRole = createMockChatForRole() as typeof llmGateway.chatForRole;
|
||||
}
|
||||
@@ -90,30 +90,6 @@ export class LLMGateway {
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Embedding via the provider assigned to the 'embedding' role.
|
||||
*/
|
||||
async embedForRole(texts: string[]): Promise<number[][]> {
|
||||
const assignment = modelRoleRepo.getByRole('embedding');
|
||||
if (!assignment) throw new LLMNoProviderError('embedding');
|
||||
|
||||
return withResilience(
|
||||
this.semaphore,
|
||||
() => {
|
||||
const provider = this.getOrCreateProvider(assignment.provider_id);
|
||||
if (!provider.embed) {
|
||||
throw new LLMError(
|
||||
`Provider '${provider.type}' does not support embeddings`,
|
||||
provider.type
|
||||
);
|
||||
}
|
||||
return provider.embed(texts);
|
||||
},
|
||||
this.retryOptions,
|
||||
'embedding'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Invalidate cached provider instance (call when config/key changes via UI).
|
||||
*/
|
||||
|
||||
@@ -76,6 +76,52 @@ function extractToolCalls(choice: OpenAI.ChatCompletion.Choice): LLMToolCall[] {
|
||||
}));
|
||||
}
|
||||
|
||||
type OpenAIToolChoice = NonNullable<OpenAI.ChatCompletionCreateParamsNonStreaming['tool_choice']>;
|
||||
|
||||
function isNamedToolChoice(value: unknown): value is {
|
||||
type: 'function';
|
||||
function: { name: string };
|
||||
} {
|
||||
if (!value || typeof value !== 'object') return false;
|
||||
const candidate = value as Record<string, unknown>;
|
||||
if (candidate.type !== 'function') return false;
|
||||
const fn = candidate.function;
|
||||
return Boolean(
|
||||
fn && typeof fn === 'object' && typeof (fn as Record<string, unknown>).name === 'string'
|
||||
);
|
||||
}
|
||||
|
||||
function toOpenAIToolChoice(value: unknown): OpenAIToolChoice | undefined {
|
||||
if (value === 'auto' || value === 'none' || value === 'required') {
|
||||
return value;
|
||||
}
|
||||
if (isNamedToolChoice(value)) {
|
||||
return {
|
||||
type: 'function',
|
||||
function: {
|
||||
name: value.function.name,
|
||||
},
|
||||
};
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function buildOpenAICompatibleChatParams(
|
||||
request: LLMChatRequest,
|
||||
model: string
|
||||
): OpenAI.ChatCompletionCreateParamsNonStreaming {
|
||||
const toolChoice = toOpenAIToolChoice(request.providerOptions?.tool_choice);
|
||||
return {
|
||||
model,
|
||||
messages: toOpenAIMessages(request.messages),
|
||||
...(request.temperature !== undefined ? { temperature: request.temperature } : {}),
|
||||
...(request.maxTokens !== undefined ? { max_tokens: request.maxTokens } : {}),
|
||||
...(request.responseFormat === 'json' ? { response_format: { type: 'json_object' } } : {}),
|
||||
...(request.tools?.length ? { tools: toOpenAITools(request.tools) } : {}),
|
||||
...(toolChoice ? { tool_choice: toolChoice } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
class OpenAICompatibleProvider implements LLMProvider {
|
||||
readonly type = TYPE;
|
||||
readonly capabilities: ProviderCapabilities;
|
||||
@@ -101,15 +147,7 @@ class OpenAICompatibleProvider implements LLMProvider {
|
||||
|
||||
async chat(request: LLMChatRequest): Promise<LLMChatResponse> {
|
||||
const model = request.model || this.defaultModel;
|
||||
|
||||
const params: OpenAI.ChatCompletionCreateParamsNonStreaming = {
|
||||
model,
|
||||
messages: toOpenAIMessages(request.messages),
|
||||
...(request.temperature !== undefined ? { temperature: request.temperature } : {}),
|
||||
...(request.maxTokens !== undefined ? { max_tokens: request.maxTokens } : {}),
|
||||
...(request.responseFormat === 'json' ? { response_format: { type: 'json_object' } } : {}),
|
||||
...(request.tools?.length ? { tools: toOpenAITools(request.tools) as any } : {}),
|
||||
};
|
||||
const params = buildOpenAICompatibleChatParams(request, model);
|
||||
|
||||
try {
|
||||
const response = await this.client.chat.completions.create(params);
|
||||
|
||||
@@ -5,7 +5,16 @@
|
||||
|
||||
import type { LLMToolDefinition } from './types';
|
||||
|
||||
export function toOpenAITools(tools: LLMToolDefinition[]): object[] {
|
||||
export interface OpenAIToolDefinition {
|
||||
type: 'function';
|
||||
function: {
|
||||
name: string;
|
||||
description: string;
|
||||
parameters: Record<string, unknown>;
|
||||
};
|
||||
}
|
||||
|
||||
export function toOpenAITools(tools: LLMToolDefinition[]): OpenAIToolDefinition[] {
|
||||
return tools.map((tool) => ({
|
||||
type: 'function' as const,
|
||||
function: {
|
||||
|
||||
@@ -10,15 +10,9 @@
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Business role that maps to a specific provider + model via DB config. */
|
||||
export type ModelRole = 'planner' | 'specialist' | 'judge' | 'embedding';
|
||||
export type ModelRole = 'planner' | 'specialist';
|
||||
|
||||
/** All valid model roles. */
|
||||
export const MODEL_ROLES: readonly ModelRole[] = [
|
||||
'planner',
|
||||
'specialist',
|
||||
'judge',
|
||||
'embedding',
|
||||
] as const;
|
||||
export const MODEL_ROLES: readonly ModelRole[] = ['planner', 'specialist'] as const;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Provider Type
|
||||
|
||||
488
src/review/__tests__/autonomous-review-agent.test.ts
Normal file
488
src/review/__tests__/autonomous-review-agent.test.ts
Normal file
@@ -0,0 +1,488 @@
|
||||
import { describe, expect, mock, test } from 'bun:test';
|
||||
import { z } from 'zod';
|
||||
import type { LLMGateway } from '../../llm/gateway';
|
||||
import type {
|
||||
LLMChatRequest,
|
||||
LLMChatResponse,
|
||||
LLMMessage,
|
||||
LLMToolDefinition,
|
||||
ModelRole,
|
||||
} from '../../llm/types';
|
||||
import { AutonomousReviewAgent } from '../agents/autonomous-review-agent';
|
||||
import { ToolRegistry } from '../tools/registry';
|
||||
import type { Tool } from '../tools/types';
|
||||
import type { ReviewContext, ReviewRun, ReviewTask } from '../types';
|
||||
|
||||
type ChatRequest = {
|
||||
messages: LLMMessage[];
|
||||
temperature?: number;
|
||||
responseFormat?: 'text' | 'json';
|
||||
tools?: LLMToolDefinition[];
|
||||
providerOptions?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
type ChatCall = { role: ModelRole } & ChatRequest;
|
||||
|
||||
function makeRun(overrides: Partial<ReviewRun> = {}): ReviewRun {
|
||||
return {
|
||||
id: 'run-autonomous-001',
|
||||
idempotencyKey: 'idem-autonomous',
|
||||
eventType: 'pull_request',
|
||||
status: 'in_progress',
|
||||
owner: 'test-owner',
|
||||
repo: 'test-repo',
|
||||
cloneUrl: 'https://example.com/repo.git',
|
||||
prNumber: 1,
|
||||
baseSha: 'aaa',
|
||||
headSha: 'bbb',
|
||||
attempts: 0,
|
||||
maxAttempts: 2,
|
||||
createdAt: new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString(),
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function makeContext(overrides: Partial<ReviewContext> = {}): ReviewContext {
|
||||
return {
|
||||
workspacePath: '/tmp/test-workspace',
|
||||
mirrorPath: '/tmp/test-mirror',
|
||||
diff: '--- a/src/foo.ts\n+++ b/src/foo.ts\n@@ -1,3 +1,4 @@\n+const x = null;\n export function foo() {}',
|
||||
changedFiles: [{ path: 'src/foo.ts', status: 'M', additions: 1, deletions: 0 }],
|
||||
parsedDiff: [
|
||||
{
|
||||
path: 'src/foo.ts',
|
||||
changes: [{ lineNumber: 1, content: 'const x = null;', type: 'add' }],
|
||||
},
|
||||
],
|
||||
fileContents: { 'src/foo.ts': 'const x = null;\nexport function foo() {}' },
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function makeTask(overrides: Partial<ReviewTask> = {}): ReviewTask {
|
||||
return {
|
||||
mode: 'full',
|
||||
reviewSize: 'medium',
|
||||
riskTags: ['quality-sensitive'],
|
||||
suspectedEntrypoints: ['src/foo.ts'],
|
||||
tokenBudget: 8000,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function makeTool(name: string, execute: Tool['execute']): Tool {
|
||||
return {
|
||||
name,
|
||||
description: `Tool ${name}`,
|
||||
parameters: z.object({
|
||||
query: z.string().optional(),
|
||||
pattern: z.string().optional(),
|
||||
file_path: z.string().optional(),
|
||||
}),
|
||||
isConcurrencySafe: true,
|
||||
execute,
|
||||
};
|
||||
}
|
||||
|
||||
function createMockGateway(responses: Array<(call: ChatCall) => LLMChatResponse>) {
|
||||
let callIndex = 0;
|
||||
const calls: ChatCall[] = [];
|
||||
|
||||
return {
|
||||
gateway: {
|
||||
chatForRole: async (role: ModelRole, request: Omit<LLMChatRequest, 'model'>) => {
|
||||
const call = { role, ...request };
|
||||
calls.push(call);
|
||||
const responseFn = responses[callIndex] ?? responses[responses.length - 1];
|
||||
callIndex++;
|
||||
return responseFn(call);
|
||||
},
|
||||
},
|
||||
getCalls: () => calls,
|
||||
};
|
||||
}
|
||||
|
||||
function toolCallResponse(
|
||||
toolCalls: Array<{ id: string; name: string; args: unknown }>
|
||||
): LLMChatResponse {
|
||||
return {
|
||||
content: null,
|
||||
toolCalls: toolCalls.map((toolCall) => ({
|
||||
id: toolCall.id,
|
||||
name: toolCall.name,
|
||||
arguments: JSON.stringify(toolCall.args),
|
||||
})),
|
||||
finishReason: 'tool_calls',
|
||||
usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
|
||||
};
|
||||
}
|
||||
|
||||
function contentResponse(content: string): LLMChatResponse {
|
||||
return {
|
||||
content,
|
||||
toolCalls: [],
|
||||
finishReason: 'stop',
|
||||
usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
|
||||
};
|
||||
}
|
||||
|
||||
function jsonResponse(data: unknown): LLMChatResponse {
|
||||
return contentResponse(JSON.stringify(data));
|
||||
}
|
||||
|
||||
describe('AutonomousReviewAgent', () => {
|
||||
test('model-driven investigation can search, read, then finalize without forced tool_choice or investigation JSON mode', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
const searchCode = mock(async () => ({ results: ['src/foo.ts contains null'] }));
|
||||
const readFile = mock(async () => ({ path: 'src/foo.ts', content: 'const x = null;' }));
|
||||
registry.register(makeTool('search_code', searchCode));
|
||||
registry.register(makeTool('read_file', readFile));
|
||||
|
||||
const finding = {
|
||||
category: 'quality' as const,
|
||||
severity: 'medium' as const,
|
||||
confidence: 0.84,
|
||||
path: 'src/foo.ts',
|
||||
line: 1,
|
||||
title: 'Null assignment needs guard',
|
||||
detail: 'The new value is null and later code assumes a value.',
|
||||
evidence: 'const x = null;',
|
||||
suggestion: 'Use a safe default or guard downstream access.',
|
||||
};
|
||||
const { gateway, getCalls } = createMockGateway([
|
||||
() => toolCallResponse([{ id: 'call_1', name: 'search_code', args: { query: 'null' } }]),
|
||||
() =>
|
||||
toolCallResponse([{ id: 'call_2', name: 'read_file', args: { file_path: 'src/foo.ts' } }]),
|
||||
() => jsonResponse({ findings: [finding] }),
|
||||
]);
|
||||
|
||||
const agent = new AutonomousReviewAgent(gateway as unknown as LLMGateway, registry);
|
||||
const result = await agent.review(makeRun(), makeContext(), makeTask());
|
||||
|
||||
expect(searchCode).toHaveBeenCalledTimes(1);
|
||||
expect(readFile).toHaveBeenCalledTimes(1);
|
||||
expect(result.findings).toHaveLength(1);
|
||||
expect(result.findings[0]).toMatchObject({ category: 'quality', path: 'src/foo.ts' });
|
||||
|
||||
const calls = getCalls();
|
||||
expect(calls).toHaveLength(3);
|
||||
expect(calls[0].role).toBe('specialist');
|
||||
expect(calls[0].tools?.map((tool) => tool.name)).toEqual(['search_code', 'read_file']);
|
||||
expect(calls[0].responseFormat).toBeUndefined();
|
||||
expect(calls[0].providerOptions).toBeUndefined();
|
||||
expect(calls[1].responseFormat).toBeUndefined();
|
||||
expect(calls[1].providerOptions).toBeUndefined();
|
||||
expect(result.diagnostics).toMatchObject({
|
||||
iterations: 3,
|
||||
toolCallNames: ['search_code', 'read_file'],
|
||||
toolCallCount: 2,
|
||||
parsedFindingCount: 1,
|
||||
stopReason: 'modelFinalized',
|
||||
});
|
||||
expect(result.diagnostics?.stateSequence).toEqual([
|
||||
'investigating',
|
||||
'tool_calling',
|
||||
'investigating',
|
||||
'tool_calling',
|
||||
'investigating',
|
||||
'finalizing',
|
||||
'completed',
|
||||
]);
|
||||
});
|
||||
|
||||
test('cross-file investigation reads caller and callee before reporting autonomous finding', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
const searchCode = mock(async () => ({
|
||||
matches: [
|
||||
{
|
||||
path: 'src/caller.ts',
|
||||
line: 4,
|
||||
content: 'return normalizeToken(raw).trim();',
|
||||
},
|
||||
{
|
||||
path: 'src/callee.ts',
|
||||
line: 2,
|
||||
content: 'return raw.length === 0 ? null : raw;',
|
||||
},
|
||||
],
|
||||
total: 2,
|
||||
}));
|
||||
const readFile = mock(async ({ file_path }: { file_path?: string }) => {
|
||||
if (file_path === 'src/caller.ts') {
|
||||
return {
|
||||
path: 'src/caller.ts',
|
||||
content:
|
||||
"import { normalizeToken } from './callee';\n\nexport function buildHeader(raw: string) {\n return normalizeToken(raw).trim();\n}",
|
||||
};
|
||||
}
|
||||
if (file_path === 'src/callee.ts') {
|
||||
return {
|
||||
path: 'src/callee.ts',
|
||||
content:
|
||||
'export function normalizeToken(raw: string): string | null {\n return raw.length === 0 ? null : raw;\n}',
|
||||
};
|
||||
}
|
||||
return { path: file_path, error: 'unexpected file' };
|
||||
});
|
||||
registry.register(makeTool('search_code', searchCode));
|
||||
registry.register(makeTool('read_file', readFile));
|
||||
|
||||
const finding = {
|
||||
category: 'correctness' as const,
|
||||
severity: 'high' as const,
|
||||
confidence: 0.93,
|
||||
path: 'src/caller.ts',
|
||||
line: 4,
|
||||
title: 'Caller trims nullable callee result',
|
||||
detail:
|
||||
'buildHeader calls trim() on normalizeToken(raw), but normalizeToken returns null for empty input in src/callee.ts.',
|
||||
evidence:
|
||||
'src/caller.ts: normalizeToken(raw).trim(); src/callee.ts: return raw.length === 0 ? null : raw;',
|
||||
suggestion: 'Guard the nullable result or change normalizeToken to always return a string.',
|
||||
};
|
||||
const { gateway, getCalls } = createMockGateway([
|
||||
() =>
|
||||
toolCallResponse([
|
||||
{ id: 'call_1', name: 'search_code', args: { pattern: 'normalizeToken' } },
|
||||
]),
|
||||
() =>
|
||||
toolCallResponse([
|
||||
{ id: 'call_2', name: 'read_file', args: { file_path: 'src/caller.ts' } },
|
||||
]),
|
||||
() =>
|
||||
toolCallResponse([
|
||||
{ id: 'call_3', name: 'read_file', args: { file_path: 'src/callee.ts' } },
|
||||
]),
|
||||
(call) => {
|
||||
const toolMessages = call.messages.filter((message) => message.role === 'tool');
|
||||
expect(toolMessages).toHaveLength(3);
|
||||
expect(toolMessages.map((message) => message.content)).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.stringContaining('src/caller.ts'),
|
||||
expect.stringContaining('src/callee.ts'),
|
||||
])
|
||||
);
|
||||
return jsonResponse({ findings: [finding] });
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await new AutonomousReviewAgent(
|
||||
gateway as unknown as LLMGateway,
|
||||
registry
|
||||
).review(
|
||||
makeRun(),
|
||||
makeContext({
|
||||
diff: [
|
||||
'--- a/src/caller.ts',
|
||||
'+++ b/src/caller.ts',
|
||||
'@@ -1,3 +1,5 @@',
|
||||
"+import { normalizeToken } from './callee';",
|
||||
'+export function buildHeader(raw: string) {',
|
||||
'+ return normalizeToken(raw).trim();',
|
||||
'+}',
|
||||
].join('\n'),
|
||||
changedFiles: [{ path: 'src/caller.ts', status: 'M', additions: 4, deletions: 0 }],
|
||||
parsedDiff: [
|
||||
{
|
||||
path: 'src/caller.ts',
|
||||
changes: [
|
||||
{ lineNumber: 4, content: ' return normalizeToken(raw).trim();', type: 'add' },
|
||||
],
|
||||
},
|
||||
],
|
||||
fileContents: {
|
||||
'src/caller.ts':
|
||||
"import { normalizeToken } from './callee';\n\nexport function buildHeader(raw: string) {\n return normalizeToken(raw).trim();\n}",
|
||||
},
|
||||
}),
|
||||
makeTask({ suspectedEntrypoints: ['src/caller.ts'], maxTurns: 6, maxToolCalls: 6 })
|
||||
);
|
||||
|
||||
expect(searchCode).toHaveBeenCalledTimes(1);
|
||||
expect(readFile).toHaveBeenCalledTimes(2);
|
||||
expect(readFile.mock.calls.map(([params]) => params.file_path)).toEqual([
|
||||
'src/caller.ts',
|
||||
'src/callee.ts',
|
||||
]);
|
||||
expect(getCalls()).toHaveLength(4);
|
||||
expect(result.findings).toHaveLength(1);
|
||||
expect(result.findings[0]).toMatchObject({
|
||||
category: 'correctness',
|
||||
severity: 'high',
|
||||
path: 'src/caller.ts',
|
||||
line: 4,
|
||||
title: 'Caller trims nullable callee result',
|
||||
});
|
||||
expect(result.findings[0].detail).toContain('src/callee.ts');
|
||||
expect(result.diagnostics).toMatchObject({
|
||||
toolCallNames: ['search_code', 'read_file', 'read_file'],
|
||||
toolCallCount: 3,
|
||||
parsedFindingCount: 1,
|
||||
stopReason: 'modelFinalized',
|
||||
});
|
||||
});
|
||||
|
||||
test('compact context keeps changed files and file contents outside suspectedEntrypoints', async () => {
|
||||
const { gateway, getCalls } = createMockGateway([() => jsonResponse({ findings: [] })]);
|
||||
const agent = new AutonomousReviewAgent(gateway as unknown as LLMGateway);
|
||||
|
||||
await agent.review(
|
||||
makeRun(),
|
||||
makeContext({
|
||||
diff: [
|
||||
'--- a/src/entry.ts',
|
||||
'+++ b/src/entry.ts',
|
||||
'@@ -1,2 +1,3 @@',
|
||||
'+export const entry = true;',
|
||||
'--- a/src/other.ts',
|
||||
'+++ b/src/other.ts',
|
||||
'@@ -1,2 +1,3 @@',
|
||||
'+export const other = true;',
|
||||
].join('\n'),
|
||||
changedFiles: [
|
||||
{ path: 'src/entry.ts', status: 'M', additions: 1, deletions: 0 },
|
||||
{ path: 'src/other.ts', status: 'M', additions: 1, deletions: 0 },
|
||||
],
|
||||
parsedDiff: [
|
||||
{
|
||||
path: 'src/entry.ts',
|
||||
changes: [{ lineNumber: 1, content: 'export const entry = true;', type: 'add' }],
|
||||
},
|
||||
{
|
||||
path: 'src/other.ts',
|
||||
changes: [{ lineNumber: 1, content: 'export const other = true;', type: 'add' }],
|
||||
},
|
||||
],
|
||||
fileContents: {
|
||||
'src/entry.ts': 'export const entry = true;',
|
||||
'src/other.ts': 'export const other = true;',
|
||||
},
|
||||
}),
|
||||
makeTask({ suspectedEntrypoints: ['src/entry.ts'] })
|
||||
);
|
||||
|
||||
const userPrompt = getCalls()[0].messages.find((message) => message.role === 'user');
|
||||
expect(userPrompt?.content).toContain('suspectedEntrypoints: src/entry.ts');
|
||||
expect(userPrompt?.content).toContain('src/other.ts');
|
||||
expect(userPrompt?.content).toContain('export const other = true;');
|
||||
expect(userPrompt?.content).toContain('changedFiles');
|
||||
});
|
||||
|
||||
test('uses default light budget and synthesizes after maxTurns when task omits specific limits', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(makeTool('search_code', async () => ({ results: [] })));
|
||||
const { gateway, getCalls } = createMockGateway([
|
||||
() => toolCallResponse([{ id: 'call_1', name: 'search_code', args: { query: 'a' } }]),
|
||||
() => toolCallResponse([{ id: 'call_2', name: 'search_code', args: { query: 'b' } }]),
|
||||
() => toolCallResponse([{ id: 'call_3', name: 'search_code', args: { query: 'c' } }]),
|
||||
() => toolCallResponse([{ id: 'call_4', name: 'search_code', args: { query: 'd' } }]),
|
||||
() => jsonResponse({ findings: [] }),
|
||||
]);
|
||||
|
||||
const agent = new AutonomousReviewAgent(gateway as unknown as LLMGateway, registry);
|
||||
const result = await agent.review(
|
||||
makeRun(),
|
||||
makeContext(),
|
||||
makeTask({
|
||||
mode: 'light',
|
||||
maxTurns: undefined,
|
||||
maxToolCalls: undefined,
|
||||
maxElapsedMs: undefined,
|
||||
})
|
||||
);
|
||||
|
||||
expect(getCalls()).toHaveLength(5);
|
||||
expect(getCalls()[4].responseFormat).toBe('json');
|
||||
expect(result.findings).toHaveLength(0);
|
||||
expect(result.diagnostics).toMatchObject({
|
||||
iterations: 4,
|
||||
toolCallCount: 4,
|
||||
stopReason: 'maxTurns',
|
||||
parsedFindingCount: 0,
|
||||
});
|
||||
expect(result.diagnostics?.stateSequence).toContain('synthesizing');
|
||||
});
|
||||
|
||||
test('finalization repairs invalid JSON once and accepts valid JSON on second attempt', async () => {
|
||||
const validFinding = {
|
||||
severity: 'high' as const,
|
||||
confidence: 0.91,
|
||||
path: 'src/foo.ts',
|
||||
line: 1,
|
||||
title: 'Unsafe null',
|
||||
detail: 'Null is returned to callers that expect a string.',
|
||||
evidence: 'const x = null;',
|
||||
suggestion: 'Return a string or update callers to handle null.',
|
||||
};
|
||||
const { gateway, getCalls } = createMockGateway([
|
||||
() => contentResponse('not valid json'),
|
||||
() => jsonResponse({ findings: [validFinding] }),
|
||||
]);
|
||||
|
||||
const agent = new AutonomousReviewAgent(gateway as unknown as LLMGateway);
|
||||
const result = await agent.review(makeRun(), makeContext(), makeTask({ mode: 'light' }));
|
||||
|
||||
expect(getCalls()).toHaveLength(2);
|
||||
expect(getCalls()[0].responseFormat).toBeUndefined();
|
||||
expect(getCalls()[1].responseFormat).toBe('json');
|
||||
expect(result.findings).toHaveLength(1);
|
||||
expect(result.findings[0].category).toBe('correctness');
|
||||
expect(result.findings[0].fingerprint).toBeTruthy();
|
||||
expect(result.diagnostics?.parseErrors?.length).toBe(1);
|
||||
expect(result.diagnostics?.finalResponsePreview).toContain('Unsafe null');
|
||||
});
|
||||
|
||||
test('stops after two consecutive empty investigation responses', async () => {
|
||||
const { gateway } = createMockGateway([
|
||||
() => ({
|
||||
content: null,
|
||||
toolCalls: [],
|
||||
finishReason: 'stop',
|
||||
usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
|
||||
}),
|
||||
() => ({
|
||||
content: '',
|
||||
toolCalls: [],
|
||||
finishReason: 'stop',
|
||||
usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
|
||||
}),
|
||||
() => jsonResponse({ findings: [] }),
|
||||
]);
|
||||
|
||||
const agent = new AutonomousReviewAgent(gateway as unknown as LLMGateway);
|
||||
const result = await agent.review(makeRun(), makeContext(), makeTask());
|
||||
|
||||
expect(result.findings).toHaveLength(0);
|
||||
expect(result.diagnostics).toMatchObject({
|
||||
emptyResponseCount: 2,
|
||||
stopReason: 'emptyResponses',
|
||||
parsedFindingCount: 0,
|
||||
});
|
||||
});
|
||||
|
||||
test('stops after three consecutive tool failures and records sequence', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(
|
||||
makeTool('broken_tool', async () => {
|
||||
throw new Error('boom');
|
||||
})
|
||||
);
|
||||
const { gateway } = createMockGateway([
|
||||
() => toolCallResponse([{ id: 'call_1', name: 'broken_tool', args: {} }]),
|
||||
() => toolCallResponse([{ id: 'call_2', name: 'broken_tool', args: {} }]),
|
||||
() => toolCallResponse([{ id: 'call_3', name: 'broken_tool', args: {} }]),
|
||||
() => jsonResponse({ findings: [] }),
|
||||
]);
|
||||
|
||||
const agent = new AutonomousReviewAgent(gateway as unknown as LLMGateway, registry);
|
||||
const result = await agent.review(makeRun(), makeContext(), makeTask({ maxTurns: 10 }));
|
||||
|
||||
expect(result.findings).toHaveLength(0);
|
||||
expect(result.diagnostics).toMatchObject({
|
||||
toolCallNames: ['broken_tool', 'broken_tool', 'broken_tool'],
|
||||
consecutiveToolFailures: 3,
|
||||
stopReason: 'toolFailures',
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,618 +0,0 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test';
|
||||
import { mock } from 'bun:test';
|
||||
import { mkdtemp, rm } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import path from 'node:path';
|
||||
import { JudgeAgent } from '../agents/judge-agent';
|
||||
import type { TriageResult } from '../agents/triage-agent';
|
||||
import type { DiffExtractor } from '../context/diff-extractor';
|
||||
import type { LocalRepoManager } from '../context/local-repo-manager';
|
||||
import { ReviewOrchestrator } from '../orchestrator';
|
||||
import { applyPublishPolicy } from '../policy/publish-policy';
|
||||
import { FileReviewStore } from '../store/file-review-store';
|
||||
import type { Finding, PullRequestReviewPayload, ReviewContext, ReviewRun } from '../types';
|
||||
|
||||
type PartialFinding = Omit<Finding, 'id' | 'runId' | 'published'>;
|
||||
|
||||
function makePRPayload(
|
||||
overrides: Partial<PullRequestReviewPayload> = {}
|
||||
): PullRequestReviewPayload {
|
||||
return {
|
||||
idempotencyKey: 'test/repo#1:aaa...bbb',
|
||||
eventType: 'pull_request',
|
||||
owner: 'test-owner',
|
||||
repo: 'test-repo',
|
||||
cloneUrl: 'https://gitea.example.com/test-owner/test-repo.git',
|
||||
prNumber: 1,
|
||||
baseSha: 'aaa',
|
||||
headSha: 'bbb',
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function makeAgentFindings(
|
||||
count: number,
|
||||
severity: 'high' | 'medium' | 'low' = 'high'
|
||||
): PartialFinding[] {
|
||||
return Array.from({ length: count }, (_, i) => ({
|
||||
fingerprint: `fp-${severity}-${i}`,
|
||||
category: 'correctness' as const,
|
||||
severity,
|
||||
confidence: severity === 'high' ? 0.95 : severity === 'medium' ? 0.85 : 0.7,
|
||||
path: `src/file${i}.ts`,
|
||||
line: 10 + i,
|
||||
title: `${severity} issue ${i}`,
|
||||
detail: `Detail for ${severity} issue ${i}`,
|
||||
evidence: `Evidence ${i}`,
|
||||
suggestion: `Fix suggestion ${i}`,
|
||||
}));
|
||||
}
|
||||
|
||||
function makeReviewContext(overrides: Partial<ReviewContext> = {}): ReviewContext {
|
||||
return {
|
||||
workspacePath: '/tmp/workspace',
|
||||
mirrorPath: '/tmp/mirror',
|
||||
diff: 'diff --git a/src/core.ts b/src/core.ts\n+export const a = 1;',
|
||||
changedFiles: [{ path: 'src/core.ts', status: 'M', additions: 1, deletions: 0 }],
|
||||
parsedDiff: [
|
||||
{
|
||||
path: 'src/core.ts',
|
||||
changes: [{ lineNumber: 1, oldLineNumber: 1, content: 'export const a = 1;', type: 'add' }],
|
||||
},
|
||||
],
|
||||
fileContents: { 'src/core.ts': 'export const a = 1;' },
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function createOrchestratorDeps(context: ReviewContext) {
|
||||
const localRepoManager = {
|
||||
prepareWorkspace: mock(async () => ({
|
||||
mirrorPath: '/tmp/mirror',
|
||||
workspacePath: '/tmp/workspace',
|
||||
})),
|
||||
resolveReviewedRef: mock(async () => null),
|
||||
saveReviewedRef: mock(async () => undefined),
|
||||
cleanupWorkspace: mock(async () => undefined),
|
||||
};
|
||||
|
||||
const diffExtractor = {
|
||||
getSandbox: mock(() => ({
|
||||
execute: async () => ({ stdout: '', stderr: '', exitCode: 0 }),
|
||||
})),
|
||||
buildContext: mock(async () => context),
|
||||
};
|
||||
|
||||
return {
|
||||
localRepoManager,
|
||||
diffExtractor,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Integration tests: Store → JudgeAgent → PublishPolicy → Store pipeline
|
||||
*
|
||||
* These tests simulate the orchestrator's data flow without needing
|
||||
* live OpenAI or Gitea services. They verify that the pipeline from
|
||||
* enqueueing a run through judging findings to applying publish policy
|
||||
* works correctly end-to-end.
|
||||
*/
|
||||
describe('Integration: Store → Judge → Policy pipeline', () => {
|
||||
let tempDir: string;
|
||||
let store: FileReviewStore;
|
||||
|
||||
beforeEach(async () => {
|
||||
tempDir = await mkdtemp(path.join(tmpdir(), 'integration-test-'));
|
||||
store = new FileReviewStore(tempDir);
|
||||
await store.init();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test('full pipeline: enqueue → agent findings → judge dedup → policy → store findings → publish mark', async () => {
|
||||
const payload = makePRPayload();
|
||||
const { run, reused } = await store.createOrReuseRun(payload);
|
||||
expect(reused).toBe(false);
|
||||
expect(run.status).toBe('queued');
|
||||
|
||||
const acquired = await store.acquireNextQueuedRun();
|
||||
expect(acquired).not.toBeNull();
|
||||
expect(acquired!.status).toBe('in_progress');
|
||||
|
||||
await store.addStep({
|
||||
runId: run.id,
|
||||
stepName: 'run_specialists',
|
||||
status: 'started',
|
||||
startedAt: new Date().toISOString(),
|
||||
});
|
||||
|
||||
const correctnessFindings = makeAgentFindings(2, 'high');
|
||||
const securityFindings = makeAgentFindings(1, 'medium');
|
||||
const lowFindings = makeAgentFindings(1, 'low');
|
||||
|
||||
const duplicateFinding: PartialFinding = {
|
||||
...correctnessFindings[0],
|
||||
confidence: 0.7,
|
||||
detail: 'Duplicate with lower confidence',
|
||||
};
|
||||
|
||||
const allAgentFindings = [
|
||||
...correctnessFindings,
|
||||
...securityFindings,
|
||||
...lowFindings,
|
||||
duplicateFinding,
|
||||
];
|
||||
|
||||
const judge = new JudgeAgent();
|
||||
const decision = judge.judge(allAgentFindings);
|
||||
|
||||
expect(decision.findings.length).toBe(4);
|
||||
const dedupedFp0 = decision.findings.find((f) => f.fingerprint === 'fp-high-0');
|
||||
expect(dedupedFp0!.confidence).toBe(0.95);
|
||||
|
||||
const policyResult = applyPublishPolicy(decision.findings, 0.8, false);
|
||||
|
||||
expect(policyResult.publishable.length).toBe(3);
|
||||
expect(policyResult.gated.length).toBe(0);
|
||||
expect(policyResult.dropped.length).toBe(1);
|
||||
expect(policyResult.dropped[0].severity).toBe('low');
|
||||
|
||||
const findingsToStore = [...policyResult.publishable, ...policyResult.gated];
|
||||
const persistedFindings: Finding[] = findingsToStore.map((f, i) => ({
|
||||
...f,
|
||||
id: `finding-${i}`,
|
||||
runId: run.id,
|
||||
published: false,
|
||||
}));
|
||||
await store.addFindings(run.id, persistedFindings);
|
||||
|
||||
for (const finding of policyResult.publishable) {
|
||||
const wasNew = await store.markFindingPublished(run.id, finding.fingerprint);
|
||||
expect(wasNew).toBe(true);
|
||||
}
|
||||
|
||||
for (const finding of policyResult.publishable) {
|
||||
const wasNew = await store.markFindingPublished(run.id, finding.fingerprint);
|
||||
expect(wasNew).toBe(false);
|
||||
}
|
||||
|
||||
await store.addCommentRecord({
|
||||
runId: run.id,
|
||||
status: 'published',
|
||||
body: `## AI Agent代码审查结果\n\n${decision.summaryMarkdown}`,
|
||||
});
|
||||
|
||||
for (const finding of policyResult.publishable) {
|
||||
await store.addCommentRecord({
|
||||
runId: run.id,
|
||||
status: 'published',
|
||||
path: finding.path,
|
||||
line: finding.line,
|
||||
body: `**[${finding.severity.toUpperCase()}]** ${finding.title}`,
|
||||
});
|
||||
}
|
||||
|
||||
await store.markRunSucceeded(run.id);
|
||||
|
||||
const details = await store.getRunDetails(run.id);
|
||||
expect(details).not.toBeNull();
|
||||
expect(details!.run.status).toBe('succeeded');
|
||||
expect(details!.findings.length).toBe(3);
|
||||
expect(details!.findings.every((f) => f.published)).toBe(true);
|
||||
expect(details!.comments.length).toBe(4);
|
||||
expect(details!.comments.filter((c) => !c.path).length).toBe(1);
|
||||
expect(details!.comments.filter((c) => c.path).length).toBe(3);
|
||||
});
|
||||
|
||||
test('pipeline with humanGate: low-confidence findings go to gated, not dropped', async () => {
|
||||
const payload = makePRPayload({ idempotencyKey: 'gate-test' });
|
||||
const { run } = await store.createOrReuseRun(payload);
|
||||
await store.acquireNextQueuedRun();
|
||||
|
||||
const findings: PartialFinding[] = [
|
||||
...makeAgentFindings(1, 'high'),
|
||||
{
|
||||
fingerprint: 'fp-low-conf',
|
||||
category: 'security',
|
||||
severity: 'high',
|
||||
confidence: 0.5,
|
||||
path: 'src/auth.ts',
|
||||
line: 20,
|
||||
title: 'Potential auth bypass',
|
||||
detail: 'Detail',
|
||||
evidence: 'Evidence',
|
||||
suggestion: 'Fix',
|
||||
},
|
||||
];
|
||||
|
||||
const judge = new JudgeAgent();
|
||||
const decision = judge.judge(findings);
|
||||
const policyResult = applyPublishPolicy(decision.findings, 0.8, true);
|
||||
|
||||
expect(policyResult.publishable.length).toBe(1);
|
||||
expect(policyResult.gated.length).toBe(1);
|
||||
expect(policyResult.dropped.length).toBe(0);
|
||||
expect(policyResult.gated[0].fingerprint).toBe('fp-low-conf');
|
||||
|
||||
const allToStore = [...policyResult.publishable, ...policyResult.gated];
|
||||
const persisted: Finding[] = allToStore.map((f, i) => ({
|
||||
...f,
|
||||
id: `f-${i}`,
|
||||
runId: run.id,
|
||||
published: false,
|
||||
}));
|
||||
await store.addFindings(run.id, persisted);
|
||||
|
||||
for (const f of policyResult.publishable) {
|
||||
await store.markFindingPublished(run.id, f.fingerprint);
|
||||
}
|
||||
|
||||
for (const f of policyResult.gated) {
|
||||
await store.addCommentRecord({
|
||||
runId: run.id,
|
||||
status: 'pending',
|
||||
path: f.path,
|
||||
line: f.line,
|
||||
body: `PENDING: ${f.title}`,
|
||||
fingerprint: f.fingerprint,
|
||||
});
|
||||
}
|
||||
|
||||
const details = await store.getRunDetails(run.id);
|
||||
const pendingComments = details!.comments.filter((c) => c.status === 'pending');
|
||||
expect(pendingComments.length).toBe(1);
|
||||
expect(pendingComments[0].fingerprint).toBe('fp-low-conf');
|
||||
|
||||
const unpublished = details!.findings.filter((f) => !f.published);
|
||||
expect(unpublished.length).toBe(1);
|
||||
expect(unpublished[0].fingerprint).toBe('fp-low-conf');
|
||||
});
|
||||
|
||||
test('idempotency: duplicate webhook enqueue returns same run', async () => {
|
||||
const payload = makePRPayload();
|
||||
|
||||
const { run: first, reused: r1 } = await store.createOrReuseRun(payload);
|
||||
expect(r1).toBe(false);
|
||||
|
||||
const { run: second, reused: r2 } = await store.createOrReuseRun(payload);
|
||||
expect(r2).toBe(true);
|
||||
expect(second.id).toBe(first.id);
|
||||
|
||||
const { run: third, reused: r3 } = await store.createOrReuseRun(payload);
|
||||
expect(r3).toBe(true);
|
||||
expect(third.id).toBe(first.id);
|
||||
});
|
||||
|
||||
test('retry flow: failed run creates new run on next enqueue, old steps/findings preserved', async () => {
|
||||
const payload = makePRPayload({ maxAttempts: 1 });
|
||||
const { run: firstRun } = await store.createOrReuseRun(payload);
|
||||
|
||||
await store.acquireNextQueuedRun();
|
||||
await store.addStep({
|
||||
runId: firstRun.id,
|
||||
stepName: 'prepare_workspace',
|
||||
status: 'failed',
|
||||
startedAt: new Date().toISOString(),
|
||||
error: 'git clone failed',
|
||||
});
|
||||
await store.markRunFailed(firstRun.id, 'git clone failed');
|
||||
|
||||
const firstDetails = await store.getRunDetails(firstRun.id);
|
||||
expect(firstDetails!.run.status).toBe('failed');
|
||||
expect(firstDetails!.steps.length).toBe(1);
|
||||
|
||||
const { run: retryRun, reused } = await store.createOrReuseRun(payload);
|
||||
expect(reused).toBe(false);
|
||||
expect(retryRun.id).not.toBe(firstRun.id);
|
||||
|
||||
const retryAcquired = await store.acquireNextQueuedRun();
|
||||
expect(retryAcquired!.id).toBe(retryRun.id);
|
||||
});
|
||||
|
||||
test('recovery after crash: in_progress runs are recovered to queued', async () => {
|
||||
const p1 = makePRPayload({ idempotencyKey: 'crash-1' });
|
||||
const p2 = makePRPayload({ idempotencyKey: 'crash-2' });
|
||||
|
||||
const { run: run1 } = await store.createOrReuseRun(p1);
|
||||
const { run: run2 } = await store.createOrReuseRun(p2);
|
||||
|
||||
await store.acquireNextQueuedRun();
|
||||
await store.acquireNextQueuedRun();
|
||||
|
||||
await store.markRunSucceeded(run1.id);
|
||||
|
||||
const store2 = new FileReviewStore(tempDir);
|
||||
await store2.init();
|
||||
const recovered = await store2.recoverInterruptedRuns();
|
||||
expect(recovered).toBe(1);
|
||||
|
||||
const next = await store2.acquireNextQueuedRun();
|
||||
expect(next).not.toBeNull();
|
||||
expect(next!.id).toBe(run2.id);
|
||||
});
|
||||
|
||||
test('concurrent enqueue: multiple payloads with different keys all get unique runs', async () => {
|
||||
const payloads = Array.from({ length: 5 }, (_, i) =>
|
||||
makePRPayload({ idempotencyKey: `concurrent-${i}`, prNumber: i + 1 })
|
||||
);
|
||||
|
||||
const results = await Promise.all(payloads.map((p) => store.createOrReuseRun(p)));
|
||||
|
||||
const ids = new Set(results.map((r) => r.run.id));
|
||||
expect(ids.size).toBe(5);
|
||||
expect(results.every((r) => !r.reused)).toBe(true);
|
||||
|
||||
const runs = await store.listRuns(10);
|
||||
expect(runs.length).toBe(5);
|
||||
});
|
||||
|
||||
test('end-to-end: no findings → summary only, no line comments', async () => {
|
||||
const payload = makePRPayload({ idempotencyKey: 'no-findings' });
|
||||
const { run } = await store.createOrReuseRun(payload);
|
||||
await store.acquireNextQueuedRun();
|
||||
|
||||
const judge = new JudgeAgent();
|
||||
const decision = judge.judge([]);
|
||||
|
||||
expect(decision.findings.length).toBe(0);
|
||||
expect(decision.summaryMarkdown).toContain('未发现');
|
||||
|
||||
const policyResult = applyPublishPolicy(decision.findings, 0.8, false);
|
||||
expect(policyResult.publishable.length).toBe(0);
|
||||
expect(policyResult.gated.length).toBe(0);
|
||||
expect(policyResult.dropped.length).toBe(0);
|
||||
|
||||
await store.addCommentRecord({
|
||||
runId: run.id,
|
||||
status: 'published',
|
||||
body: decision.summaryMarkdown,
|
||||
});
|
||||
|
||||
await store.markRunSucceeded(run.id);
|
||||
|
||||
const details = await store.getRunDetails(run.id);
|
||||
expect(details!.run.status).toBe('succeeded');
|
||||
expect(details!.findings.length).toBe(0);
|
||||
expect(details!.comments.length).toBe(1);
|
||||
expect(details!.comments[0].body).toContain('未发现');
|
||||
});
|
||||
|
||||
test('store persistence: data survives across store instances', async () => {
|
||||
const payload = makePRPayload();
|
||||
const { run } = await store.createOrReuseRun(payload);
|
||||
await store.acquireNextQueuedRun();
|
||||
|
||||
const findings: Finding[] = [
|
||||
{
|
||||
id: 'persist-f1',
|
||||
runId: run.id,
|
||||
fingerprint: 'persist-fp-1',
|
||||
category: 'security',
|
||||
severity: 'high',
|
||||
confidence: 0.95,
|
||||
path: 'src/auth.ts',
|
||||
line: 42,
|
||||
title: 'SQL injection',
|
||||
detail: 'Detail',
|
||||
evidence: 'Evidence',
|
||||
suggestion: 'Use parameterized queries',
|
||||
published: false,
|
||||
},
|
||||
];
|
||||
await store.addFindings(run.id, findings);
|
||||
await store.markFindingPublished(run.id, 'persist-fp-1');
|
||||
await store.markRunSucceeded(run.id);
|
||||
|
||||
const freshStore = new FileReviewStore(tempDir);
|
||||
await freshStore.init();
|
||||
|
||||
const details = await freshStore.getRunDetails(run.id);
|
||||
expect(details).not.toBeNull();
|
||||
expect(details!.run.status).toBe('succeeded');
|
||||
expect(details!.findings.length).toBe(1);
|
||||
expect(details!.findings[0].published).toBe(true);
|
||||
expect(details!.findings[0].fingerprint).toBe('persist-fp-1');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Integration: orchestrator staged routing pipeline', () => {
|
||||
let tempDir: string;
|
||||
let store: FileReviewStore;
|
||||
|
||||
beforeEach(async () => {
|
||||
mock.restore();
|
||||
tempDir = await mkdtemp(path.join(tmpdir(), 'orchestrator-integration-'));
|
||||
store = new FileReviewStore(tempDir);
|
||||
await store.init();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
mock.restore();
|
||||
await rm(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test('skip mode bypasses specialists end-to-end', async () => {
|
||||
const payload = makePRPayload({ idempotencyKey: 'stage-skip' });
|
||||
const { run } = await store.createOrReuseRun(payload);
|
||||
const acquired = await store.acquireNextQueuedRun();
|
||||
expect(acquired).not.toBeNull();
|
||||
|
||||
const context = makeReviewContext({
|
||||
changedFiles: [{ path: 'README.md', status: 'M', additions: 2, deletions: 0 }],
|
||||
parsedDiff: [
|
||||
{
|
||||
path: 'README.md',
|
||||
changes: [{ lineNumber: 10, oldLineNumber: 10, content: 'new docs', type: 'add' }],
|
||||
},
|
||||
],
|
||||
fileContents: { 'README.md': 'new docs' },
|
||||
diff: 'diff --git a/README.md b/README.md\n+new docs',
|
||||
});
|
||||
const { localRepoManager, diffExtractor } = createOrchestratorDeps(context);
|
||||
|
||||
const orchestrator = new ReviewOrchestrator(
|
||||
store,
|
||||
localRepoManager as unknown as LocalRepoManager,
|
||||
diffExtractor as unknown as DiffExtractor
|
||||
);
|
||||
|
||||
const internal = orchestrator as unknown as {
|
||||
triageAgent: { analyze: (ctx: ReviewContext) => Promise<TriageResult> };
|
||||
correctnessAgent: {
|
||||
reviewWithOptions: (
|
||||
runArg: ReviewRun,
|
||||
ctx: ReviewContext,
|
||||
options?: unknown
|
||||
) => Promise<unknown>;
|
||||
};
|
||||
publishSummary: (runArg: ReviewRun, summary: string, gatedCount: number) => Promise<void>;
|
||||
publishLineComments: (
|
||||
runArg: ReviewRun,
|
||||
comments: Array<{ path: string; line: number; comment: string }>
|
||||
) => Promise<boolean>;
|
||||
};
|
||||
|
||||
internal.triageAgent = {
|
||||
analyze: mock(
|
||||
async (): Promise<TriageResult> => ({
|
||||
complexity: 'trivial',
|
||||
reviewSize: 'small',
|
||||
mode: 'skip',
|
||||
tasks: [],
|
||||
riskTags: [],
|
||||
rationale: 'docs-only',
|
||||
})
|
||||
),
|
||||
};
|
||||
|
||||
const correctnessSpy = mock(async () => ({ agentName: 'Correctness Agent', findings: [] }));
|
||||
internal.correctnessAgent.reviewWithOptions = correctnessSpy;
|
||||
internal.publishSummary = mock(async () => undefined);
|
||||
internal.publishLineComments = mock(async () => false);
|
||||
|
||||
await orchestrator.execute(acquired!);
|
||||
|
||||
expect(correctnessSpy).not.toHaveBeenCalled();
|
||||
|
||||
const details = await store.getRunDetails(run.id);
|
||||
expect(details).not.toBeNull();
|
||||
expect(details!.findings).toHaveLength(0);
|
||||
});
|
||||
|
||||
test('full task mode passes scoped options and publishes finding', async () => {
|
||||
const payload = makePRPayload({ idempotencyKey: 'stage-full' });
|
||||
const { run } = await store.createOrReuseRun(payload);
|
||||
const acquired = await store.acquireNextQueuedRun();
|
||||
expect(acquired).not.toBeNull();
|
||||
|
||||
const context = makeReviewContext();
|
||||
const { localRepoManager, diffExtractor } = createOrchestratorDeps(context);
|
||||
|
||||
const orchestrator = new ReviewOrchestrator(
|
||||
store,
|
||||
localRepoManager as unknown as LocalRepoManager,
|
||||
diffExtractor as unknown as DiffExtractor
|
||||
);
|
||||
|
||||
const internal = orchestrator as unknown as {
|
||||
triageAgent: { analyze: (ctx: ReviewContext) => Promise<TriageResult> };
|
||||
correctnessAgent: {
|
||||
reviewWithOptions: (
|
||||
runArg: ReviewRun,
|
||||
ctx: ReviewContext,
|
||||
options?: {
|
||||
scopePaths?: string[];
|
||||
allowTools?: boolean;
|
||||
maxIterations?: number;
|
||||
mode?: 'skip' | 'light' | 'full';
|
||||
maxContextTokens?: number;
|
||||
}
|
||||
) => Promise<{
|
||||
agentName: string;
|
||||
findings: Array<Omit<Finding, 'id' | 'runId' | 'published'>>;
|
||||
}>;
|
||||
};
|
||||
publishSummary: (runArg: ReviewRun, summary: string, gatedCount: number) => Promise<void>;
|
||||
publishLineComments: (
|
||||
runArg: ReviewRun,
|
||||
comments: Array<{ path: string; line: number; comment: string }>
|
||||
) => Promise<boolean>;
|
||||
};
|
||||
|
||||
internal.triageAgent = {
|
||||
analyze: mock(
|
||||
async (): Promise<TriageResult> => ({
|
||||
complexity: 'standard',
|
||||
reviewSize: 'small',
|
||||
mode: 'full',
|
||||
riskTags: ['security-sensitive'],
|
||||
rationale: 'auth file changed',
|
||||
tasks: [
|
||||
{
|
||||
domain: 'correctness',
|
||||
paths: ['src/core.ts'],
|
||||
riskTags: ['security-sensitive'],
|
||||
mode: 'full',
|
||||
tokenBudget: 12000,
|
||||
maxIterations: 2,
|
||||
allowTools: false,
|
||||
allowReflection: false,
|
||||
allowDebate: false,
|
||||
},
|
||||
],
|
||||
})
|
||||
),
|
||||
};
|
||||
|
||||
const correctnessSpy = mock(
|
||||
async (
|
||||
_runArg: ReviewRun,
|
||||
_ctx: ReviewContext,
|
||||
_options?: {
|
||||
scopePaths?: string[];
|
||||
allowTools?: boolean;
|
||||
maxIterations?: number;
|
||||
mode?: 'skip' | 'light' | 'full';
|
||||
maxContextTokens?: number;
|
||||
}
|
||||
) => ({
|
||||
agentName: 'Correctness Agent',
|
||||
findings: [
|
||||
{
|
||||
fingerprint: 'stage-full-fp-1',
|
||||
category: 'correctness' as const,
|
||||
severity: 'high' as const,
|
||||
confidence: 0.95,
|
||||
path: 'src/core.ts',
|
||||
line: 1,
|
||||
title: 'critical issue',
|
||||
detail: 'detail',
|
||||
evidence: 'evidence',
|
||||
suggestion: 'fix',
|
||||
},
|
||||
],
|
||||
})
|
||||
);
|
||||
internal.correctnessAgent.reviewWithOptions = correctnessSpy;
|
||||
internal.publishSummary = mock(async () => undefined);
|
||||
internal.publishLineComments = mock(async () => true);
|
||||
|
||||
await orchestrator.execute(acquired!);
|
||||
|
||||
expect(correctnessSpy).toHaveBeenCalledTimes(1);
|
||||
const callArgs = correctnessSpy.mock.calls[0];
|
||||
const options = callArgs?.[2];
|
||||
expect(options?.scopePaths).toEqual(['src/core.ts']);
|
||||
expect(options?.allowTools).toBe(false);
|
||||
expect(options?.maxIterations).toBe(2);
|
||||
expect(options?.mode).toBe('full');
|
||||
|
||||
const details = await store.getRunDetails(run.id);
|
||||
expect(details).not.toBeNull();
|
||||
expect(details!.findings).toHaveLength(1);
|
||||
expect(details!.findings[0].published).toBe(true);
|
||||
expect(details!.findings[0].path).toBe('src/core.ts');
|
||||
});
|
||||
});
|
||||
@@ -1,137 +0,0 @@
|
||||
import { describe, expect, test } from 'bun:test';
|
||||
import { JudgeAgent } from '../agents/judge-agent';
|
||||
import type { Finding } from '../types';
|
||||
|
||||
type TestFinding = Omit<Finding, 'id' | 'runId' | 'published'>;
|
||||
|
||||
function makeFinding(overrides: Partial<TestFinding> = {}): TestFinding {
|
||||
return {
|
||||
fingerprint: `fp-${Math.random().toString(36).slice(2, 8)}`,
|
||||
category: 'correctness',
|
||||
severity: 'medium',
|
||||
confidence: 0.8,
|
||||
path: 'src/foo.ts',
|
||||
line: 10,
|
||||
title: 'Test issue',
|
||||
detail: 'Detail',
|
||||
evidence: 'Evidence',
|
||||
suggestion: 'Fix it',
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('JudgeAgent', () => {
|
||||
const judge = new JudgeAgent();
|
||||
|
||||
// ─── Empty input ───
|
||||
test('empty findings → summary says 未发现', () => {
|
||||
const result = judge.judge([]);
|
||||
expect(result.findings).toHaveLength(0);
|
||||
expect(result.summaryMarkdown).toContain('未发现');
|
||||
});
|
||||
|
||||
// ─── Deduplication by fingerprint ───
|
||||
test('duplicate fingerprints → keeps highest weighted', () => {
|
||||
const fp = 'same-fingerprint';
|
||||
const findings: TestFinding[] = [
|
||||
makeFinding({ fingerprint: fp, severity: 'low', confidence: 0.9 }), // weight: 1 * 0.9 = 0.9
|
||||
makeFinding({ fingerprint: fp, severity: 'high', confidence: 0.5 }), // weight: 3 * 0.5 = 1.5 ← winner
|
||||
makeFinding({ fingerprint: fp, severity: 'medium', confidence: 0.6 }), // weight: 2 * 0.6 = 1.2
|
||||
];
|
||||
const result = judge.judge(findings);
|
||||
expect(result.findings).toHaveLength(1);
|
||||
expect(result.findings[0].severity).toBe('high');
|
||||
expect(result.findings[0].confidence).toBe(0.5);
|
||||
});
|
||||
|
||||
test('same fingerprint same weight → first one wins (no override)', () => {
|
||||
const fp = 'dup-fp';
|
||||
const findings: TestFinding[] = [
|
||||
makeFinding({ fingerprint: fp, severity: 'high', confidence: 0.5, title: 'First' }),
|
||||
makeFinding({ fingerprint: fp, severity: 'high', confidence: 0.5, title: 'Second' }),
|
||||
];
|
||||
const result = judge.judge(findings);
|
||||
expect(result.findings).toHaveLength(1);
|
||||
// Same weight → second does NOT override (currentWeight > existingWeight is strict >)
|
||||
expect(result.findings[0].title).toBe('First');
|
||||
});
|
||||
|
||||
// ─── Sorting by severity × confidence ───
|
||||
test('findings sorted by weight descending', () => {
|
||||
const findings: TestFinding[] = [
|
||||
makeFinding({ fingerprint: 'a', severity: 'low', confidence: 0.9 }), // 1 * 0.9 = 0.9
|
||||
makeFinding({ fingerprint: 'b', severity: 'high', confidence: 0.8 }), // 3 * 0.8 = 2.4
|
||||
makeFinding({ fingerprint: 'c', severity: 'medium', confidence: 0.7 }), // 2 * 0.7 = 1.4
|
||||
];
|
||||
const result = judge.judge(findings);
|
||||
expect(result.findings).toHaveLength(3);
|
||||
expect(result.findings[0].fingerprint).toBe('b'); // weight 2.4
|
||||
expect(result.findings[1].fingerprint).toBe('c'); // weight 1.4
|
||||
expect(result.findings[2].fingerprint).toBe('a'); // weight 0.9
|
||||
});
|
||||
|
||||
// ─── Summary text ───
|
||||
test('summary counts by severity', () => {
|
||||
const findings: TestFinding[] = [
|
||||
makeFinding({ fingerprint: 'a', severity: 'high', confidence: 0.9 }),
|
||||
makeFinding({ fingerprint: 'b', severity: 'high', confidence: 0.85 }),
|
||||
makeFinding({ fingerprint: 'c', severity: 'medium', confidence: 0.8 }),
|
||||
makeFinding({ fingerprint: 'd', severity: 'low', confidence: 0.7 }),
|
||||
];
|
||||
const result = judge.judge(findings);
|
||||
expect(result.summaryMarkdown).toContain('4 个问题');
|
||||
expect(result.summaryMarkdown).toContain('high 2');
|
||||
expect(result.summaryMarkdown).toContain('medium 1');
|
||||
expect(result.summaryMarkdown).toContain('low 1');
|
||||
});
|
||||
|
||||
test('single finding → counts correctly', () => {
|
||||
const findings: TestFinding[] = [
|
||||
makeFinding({ fingerprint: 'x', severity: 'medium', confidence: 0.8 }),
|
||||
];
|
||||
const result = judge.judge(findings);
|
||||
expect(result.summaryMarkdown).toContain('1 个问题');
|
||||
expect(result.summaryMarkdown).toContain('high 0');
|
||||
expect(result.summaryMarkdown).toContain('medium 1');
|
||||
expect(result.summaryMarkdown).toContain('low 0');
|
||||
});
|
||||
|
||||
// ─── Dedup + sort combined ───
|
||||
test('dedup then sort: complex scenario', () => {
|
||||
const findings: TestFinding[] = [
|
||||
makeFinding({ fingerprint: 'x', severity: 'low', confidence: 0.3 }), // weight 0.3 — will be overridden
|
||||
makeFinding({ fingerprint: 'y', severity: 'high', confidence: 0.9 }), // weight 2.7 — unique
|
||||
makeFinding({ fingerprint: 'x', severity: 'medium', confidence: 0.8 }), // weight 1.6 — overrides x
|
||||
makeFinding({ fingerprint: 'z', severity: 'high', confidence: 0.5 }), // weight 1.5 — unique
|
||||
];
|
||||
const result = judge.judge(findings);
|
||||
expect(result.findings).toHaveLength(3); // x, y, z (deduped)
|
||||
// Sorted by weight: y(2.7) > x(1.6) > z(1.5)
|
||||
expect(result.findings[0].fingerprint).toBe('y');
|
||||
expect(result.findings[1].fingerprint).toBe('x');
|
||||
expect(result.findings[1].severity).toBe('medium'); // overridden version
|
||||
expect(result.findings[2].fingerprint).toBe('z');
|
||||
});
|
||||
|
||||
// ─── All same severity ───
|
||||
test('all high severity → sorted by confidence descending', () => {
|
||||
const findings: TestFinding[] = [
|
||||
makeFinding({ fingerprint: 'a', severity: 'high', confidence: 0.5 }),
|
||||
makeFinding({ fingerprint: 'b', severity: 'high', confidence: 0.9 }),
|
||||
makeFinding({ fingerprint: 'c', severity: 'high', confidence: 0.7 }),
|
||||
];
|
||||
const result = judge.judge(findings);
|
||||
expect(result.findings[0].fingerprint).toBe('b');
|
||||
expect(result.findings[1].fingerprint).toBe('c');
|
||||
expect(result.findings[2].fingerprint).toBe('a');
|
||||
});
|
||||
|
||||
// ─── Return type structure ───
|
||||
test('result has summaryMarkdown and findings', () => {
|
||||
const result = judge.judge([]);
|
||||
expect(result).toHaveProperty('summaryMarkdown');
|
||||
expect(result).toHaveProperty('findings');
|
||||
expect(typeof result.summaryMarkdown).toBe('string');
|
||||
expect(Array.isArray(result.findings)).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -1,435 +0,0 @@
|
||||
import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test';
|
||||
import { CodexRunner } from '../codex/codex-runner';
|
||||
import type { DiffExtractor } from '../context/diff-extractor';
|
||||
import type { LocalRepoManager, LocalRepoPaths } from '../context/local-repo-manager';
|
||||
import { ReviewOrchestrator } from '../orchestrator';
|
||||
import type { FileReviewStore } from '../store/file-review-store';
|
||||
import type { Finding, ReviewContext, ReviewRun, ReviewTask } from '../types';
|
||||
|
||||
type Snapshot = { baseSha: string; headSha: string } | null;
|
||||
|
||||
function makeRun(overrides: Partial<ReviewRun> = {}): ReviewRun {
|
||||
return {
|
||||
id: 'run-1',
|
||||
idempotencyKey: 'owner/repo#1:base...head',
|
||||
eventType: 'pull_request',
|
||||
status: 'in_progress',
|
||||
owner: 'owner',
|
||||
repo: 'repo',
|
||||
cloneUrl: 'https://example.com/repo.git',
|
||||
prNumber: 1,
|
||||
baseSha: 'base-sha',
|
||||
headSha: 'head-sha',
|
||||
attempts: 1,
|
||||
maxAttempts: 3,
|
||||
createdAt: new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString(),
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function createStoreMock() {
|
||||
const store = {
|
||||
markRunIgnored: mock(async () => undefined),
|
||||
addStep: mock(async () => undefined),
|
||||
getRunDetails: mock(async () => ({ comments: [], findings: [] })),
|
||||
addFindings: mock(async () => undefined),
|
||||
markFindingPublished: mock(async () => true),
|
||||
addCommentRecord: mock(async () => undefined),
|
||||
};
|
||||
return store;
|
||||
}
|
||||
|
||||
function createLocalRepoManagerMock(snapshot: Snapshot) {
|
||||
const repoPaths: LocalRepoPaths = {
|
||||
mirrorPath: '/tmp/mirror',
|
||||
workspacePath: '/tmp/workspace',
|
||||
};
|
||||
|
||||
const manager = {
|
||||
prepareWorkspace: mock(async () => repoPaths),
|
||||
resolveReviewedRef: mock(async () => snapshot),
|
||||
saveReviewedRef: mock(async () => undefined),
|
||||
cleanupWorkspace: mock(async () => undefined),
|
||||
};
|
||||
|
||||
return { manager, repoPaths };
|
||||
}
|
||||
|
||||
function createDiffExtractorMock(diff = 'diff --git a/a.ts b/a.ts\n+const x = 1;') {
|
||||
const context: ReviewContext = {
|
||||
workspacePath: '/tmp/workspace',
|
||||
mirrorPath: '/tmp/mirror',
|
||||
diff,
|
||||
changedFiles: [],
|
||||
parsedDiff: [],
|
||||
fileContents: {},
|
||||
};
|
||||
|
||||
const extractor = {
|
||||
getSandbox: mock(() => ({
|
||||
execute: async () => ({ stdout: '', stderr: '', exitCode: 0 }),
|
||||
})),
|
||||
buildContext: mock(async () => context),
|
||||
};
|
||||
|
||||
return { extractor, context };
|
||||
}
|
||||
|
||||
function wireOrchestratorFastPath(orchestrator: ReviewOrchestrator) {
|
||||
const internal = orchestrator as unknown as {
|
||||
triageAgent: {
|
||||
analyze: (context: ReviewContext) => Promise<{
|
||||
complexity: 'trivial' | 'standard' | 'complex';
|
||||
reviewSize: 'small' | 'medium' | 'large';
|
||||
mode: 'skip' | 'light' | 'full';
|
||||
tasks: ReviewTask[];
|
||||
riskTags: string[];
|
||||
rationale: string;
|
||||
}>;
|
||||
};
|
||||
judgeAgent: {
|
||||
judge: (findings: Array<Omit<Finding, 'id' | 'runId' | 'published'>>) => {
|
||||
summaryMarkdown: string;
|
||||
findings: Array<Omit<Finding, 'id' | 'runId' | 'published'>>;
|
||||
};
|
||||
};
|
||||
publishSummary: (run: ReviewRun, summary: string, gatedCount: number) => Promise<void>;
|
||||
publishLineComments: (
|
||||
run: ReviewRun,
|
||||
comments: Array<{ path: string; line: number; comment: string }>
|
||||
) => Promise<boolean>;
|
||||
};
|
||||
|
||||
internal.triageAgent = {
|
||||
analyze: mock(async () => ({
|
||||
complexity: 'trivial' as const,
|
||||
reviewSize: 'small' as const,
|
||||
mode: 'skip' as const,
|
||||
tasks: [],
|
||||
riskTags: [],
|
||||
rationale: 'test fast-path',
|
||||
})),
|
||||
};
|
||||
|
||||
internal.judgeAgent = {
|
||||
judge: mock(() => ({ summaryMarkdown: 'ok', findings: [] })),
|
||||
};
|
||||
|
||||
internal.publishSummary = mock(async () => undefined);
|
||||
internal.publishLineComments = mock(async () => false);
|
||||
}
|
||||
|
||||
function createCodexRunnerForExecute(snapshot: Snapshot) {
|
||||
const store = createStoreMock();
|
||||
const { manager, repoPaths } = createLocalRepoManagerMock(snapshot);
|
||||
const runner = new CodexRunner(
|
||||
store as unknown as FileReviewStore,
|
||||
manager as unknown as LocalRepoManager
|
||||
);
|
||||
|
||||
const internal = runner as unknown as {
|
||||
generateCodexWorkspaceConfig: (workspacePath: string, runId: string) => Promise<void>;
|
||||
runCodexProcess: (
|
||||
workspacePath: string,
|
||||
run: ReviewRun,
|
||||
lastReviewedHead?: string
|
||||
) => Promise<void>;
|
||||
};
|
||||
|
||||
internal.generateCodexWorkspaceConfig = mock(async () => undefined);
|
||||
internal.runCodexProcess = mock(async () => undefined);
|
||||
|
||||
return {
|
||||
runner,
|
||||
store,
|
||||
manager,
|
||||
repoPaths,
|
||||
runCodexProcessMock: internal.runCodexProcess as ReturnType<typeof mock>,
|
||||
};
|
||||
}
|
||||
|
||||
describe('ReviewOrchestrator incremental baseline resolution', () => {
|
||||
beforeEach(() => {
|
||||
mock.restore();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
mock.restore();
|
||||
});
|
||||
|
||||
test('matching baseSha uses snapshot head as lastReviewedHead', async () => {
|
||||
const run = makeRun({ baseSha: 'same-base', headSha: 'new-head' });
|
||||
const store = createStoreMock();
|
||||
const { manager } = createLocalRepoManagerMock({ baseSha: 'same-base', headSha: 'old-head' });
|
||||
const { extractor } = createDiffExtractorMock();
|
||||
|
||||
const orchestrator = new ReviewOrchestrator(
|
||||
store as unknown as FileReviewStore,
|
||||
manager as unknown as LocalRepoManager,
|
||||
extractor as unknown as DiffExtractor
|
||||
);
|
||||
wireOrchestratorFastPath(orchestrator);
|
||||
|
||||
await orchestrator.execute(run);
|
||||
|
||||
expect(manager.resolveReviewedRef).toHaveBeenCalledTimes(1);
|
||||
expect(extractor.buildContext).toHaveBeenCalledTimes(1);
|
||||
expect(extractor.buildContext).toHaveBeenCalledWith(
|
||||
run,
|
||||
'/tmp/mirror',
|
||||
'/tmp/workspace',
|
||||
'old-head'
|
||||
);
|
||||
});
|
||||
|
||||
test('different baseSha falls back to full review (no lastReviewedHead)', async () => {
|
||||
const run = makeRun({ baseSha: 'current-base' });
|
||||
const store = createStoreMock();
|
||||
const { manager } = createLocalRepoManagerMock({ baseSha: 'saved-base', headSha: 'old-head' });
|
||||
const { extractor } = createDiffExtractorMock();
|
||||
|
||||
const orchestrator = new ReviewOrchestrator(
|
||||
store as unknown as FileReviewStore,
|
||||
manager as unknown as LocalRepoManager,
|
||||
extractor as unknown as DiffExtractor
|
||||
);
|
||||
wireOrchestratorFastPath(orchestrator);
|
||||
|
||||
await orchestrator.execute(run);
|
||||
|
||||
expect(extractor.buildContext).toHaveBeenCalledWith(
|
||||
run,
|
||||
'/tmp/mirror',
|
||||
'/tmp/workspace',
|
||||
undefined
|
||||
);
|
||||
});
|
||||
|
||||
test('missing snapshot falls back to full review', async () => {
|
||||
const run = makeRun();
|
||||
const store = createStoreMock();
|
||||
const { manager } = createLocalRepoManagerMock(null);
|
||||
const { extractor } = createDiffExtractorMock();
|
||||
|
||||
const orchestrator = new ReviewOrchestrator(
|
||||
store as unknown as FileReviewStore,
|
||||
manager as unknown as LocalRepoManager,
|
||||
extractor as unknown as DiffExtractor
|
||||
);
|
||||
wireOrchestratorFastPath(orchestrator);
|
||||
|
||||
await orchestrator.execute(run);
|
||||
|
||||
expect(extractor.buildContext).toHaveBeenCalledWith(
|
||||
run,
|
||||
'/tmp/mirror',
|
||||
'/tmp/workspace',
|
||||
undefined
|
||||
);
|
||||
});
|
||||
|
||||
test('non pull_request event skips incremental snapshot lookup', async () => {
|
||||
const run = makeRun({
|
||||
eventType: 'commit_status',
|
||||
prNumber: undefined,
|
||||
commitSha: 'commit-sha',
|
||||
headSha: undefined,
|
||||
});
|
||||
const store = createStoreMock();
|
||||
const { manager } = createLocalRepoManagerMock({ baseSha: 'same-base', headSha: 'old-head' });
|
||||
const { extractor } = createDiffExtractorMock();
|
||||
|
||||
const orchestrator = new ReviewOrchestrator(
|
||||
store as unknown as FileReviewStore,
|
||||
manager as unknown as LocalRepoManager,
|
||||
extractor as unknown as DiffExtractor
|
||||
);
|
||||
wireOrchestratorFastPath(orchestrator);
|
||||
|
||||
await orchestrator.execute(run);
|
||||
|
||||
expect(manager.resolveReviewedRef).not.toHaveBeenCalled();
|
||||
expect(extractor.buildContext).toHaveBeenCalledWith(
|
||||
run,
|
||||
'/tmp/mirror',
|
||||
'/tmp/workspace',
|
||||
undefined
|
||||
);
|
||||
});
|
||||
|
||||
test('successful review saves reviewed ref snapshot', async () => {
|
||||
const run = makeRun({ baseSha: 'base-1', headSha: 'head-1', prNumber: 99 });
|
||||
const store = createStoreMock();
|
||||
const { manager } = createLocalRepoManagerMock(null);
|
||||
const { extractor } = createDiffExtractorMock();
|
||||
|
||||
const orchestrator = new ReviewOrchestrator(
|
||||
store as unknown as FileReviewStore,
|
||||
manager as unknown as LocalRepoManager,
|
||||
extractor as unknown as DiffExtractor
|
||||
);
|
||||
wireOrchestratorFastPath(orchestrator);
|
||||
|
||||
await orchestrator.execute(run);
|
||||
|
||||
expect(manager.saveReviewedRef).toHaveBeenCalledTimes(1);
|
||||
expect(manager.saveReviewedRef).toHaveBeenCalledWith('/tmp/mirror', 99, 'base-1', 'head-1');
|
||||
});
|
||||
|
||||
test('failed review does not save reviewed ref snapshot', async () => {
|
||||
const run = makeRun({ baseSha: 'base-1', headSha: 'head-1', prNumber: 99 });
|
||||
const store = createStoreMock();
|
||||
const { manager } = createLocalRepoManagerMock(null);
|
||||
const { extractor } = createDiffExtractorMock();
|
||||
extractor.buildContext = mock(async () => {
|
||||
throw new Error('context failed');
|
||||
});
|
||||
|
||||
const orchestrator = new ReviewOrchestrator(
|
||||
store as unknown as FileReviewStore,
|
||||
manager as unknown as LocalRepoManager,
|
||||
extractor as unknown as DiffExtractor
|
||||
);
|
||||
|
||||
let caught: Error | undefined;
|
||||
try {
|
||||
await orchestrator.execute(run);
|
||||
} catch (error) {
|
||||
caught = error as Error;
|
||||
}
|
||||
|
||||
expect(caught).toBeDefined();
|
||||
expect(caught?.message).toContain('context failed');
|
||||
expect(manager.saveReviewedRef).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('CodexRunner incremental baseline resolution and prompt behavior', () => {
|
||||
beforeEach(() => {
|
||||
mock.restore();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
mock.restore();
|
||||
});
|
||||
|
||||
test('matching baseSha uses snapshot head for incremental run', async () => {
|
||||
const run = makeRun({ baseSha: 'same-base', headSha: 'new-head' });
|
||||
const { runner, runCodexProcessMock } = createCodexRunnerForExecute({
|
||||
baseSha: 'same-base',
|
||||
headSha: 'old-head',
|
||||
});
|
||||
|
||||
await runner.execute(run);
|
||||
|
||||
expect(runCodexProcessMock).toHaveBeenCalledWith('/tmp/workspace', run, 'old-head');
|
||||
});
|
||||
|
||||
test('different baseSha falls back to full run', async () => {
|
||||
const run = makeRun({ baseSha: 'current-base', headSha: 'new-head' });
|
||||
const { runner, runCodexProcessMock } = createCodexRunnerForExecute({
|
||||
baseSha: 'saved-base',
|
||||
headSha: 'old-head',
|
||||
});
|
||||
|
||||
await runner.execute(run);
|
||||
|
||||
expect(runCodexProcessMock).toHaveBeenCalledWith('/tmp/workspace', run, undefined);
|
||||
});
|
||||
|
||||
test('missing snapshot falls back to full run', async () => {
|
||||
const run = makeRun();
|
||||
const { runner, runCodexProcessMock } = createCodexRunnerForExecute(null);
|
||||
|
||||
await runner.execute(run);
|
||||
|
||||
expect(runCodexProcessMock).toHaveBeenCalledWith('/tmp/workspace', run, undefined);
|
||||
});
|
||||
|
||||
test('non pull_request event skips incremental snapshot lookup', async () => {
|
||||
const run = makeRun({
|
||||
eventType: 'commit_status',
|
||||
prNumber: undefined,
|
||||
commitSha: 'commit-sha',
|
||||
headSha: undefined,
|
||||
});
|
||||
const { runner, manager, runCodexProcessMock } = createCodexRunnerForExecute({
|
||||
baseSha: 'saved-base',
|
||||
headSha: 'old-head',
|
||||
});
|
||||
|
||||
await runner.execute(run);
|
||||
|
||||
expect(manager.resolveReviewedRef).not.toHaveBeenCalled();
|
||||
expect(runCodexProcessMock).toHaveBeenCalledWith('/tmp/workspace', run, undefined);
|
||||
});
|
||||
|
||||
test('successful codex review saves reviewed ref snapshot', async () => {
|
||||
const run = makeRun({ baseSha: 'base-1', headSha: 'head-1', prNumber: 22 });
|
||||
const { runner, manager } = createCodexRunnerForExecute(null);
|
||||
|
||||
await runner.execute(run);
|
||||
|
||||
expect(manager.saveReviewedRef).toHaveBeenCalledTimes(1);
|
||||
expect(manager.saveReviewedRef).toHaveBeenCalledWith('/tmp/mirror', 22, 'base-1', 'head-1');
|
||||
});
|
||||
|
||||
test('failed codex review does not save reviewed ref snapshot', async () => {
|
||||
const run = makeRun({ baseSha: 'base-1', headSha: 'head-1', prNumber: 22 });
|
||||
const { runner, manager } = createCodexRunnerForExecute(null);
|
||||
|
||||
const internal = runner as unknown as {
|
||||
runCodexProcess: (
|
||||
workspacePath: string,
|
||||
runArg: ReviewRun,
|
||||
lastReviewedHead?: string
|
||||
) => Promise<void>;
|
||||
};
|
||||
internal.runCodexProcess = mock(async () => {
|
||||
throw new Error('codex failed');
|
||||
});
|
||||
|
||||
let caught: Error | undefined;
|
||||
try {
|
||||
await runner.execute(run);
|
||||
} catch (error) {
|
||||
caught = error as Error;
|
||||
}
|
||||
|
||||
expect(caught).toBeDefined();
|
||||
expect(caught?.message).toContain('codex failed');
|
||||
expect(manager.saveReviewedRef).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test('buildReviewPrompt includes incremental instructions when lastReviewedHead is set', () => {
|
||||
const run = makeRun({ baseSha: 'base-a', headSha: 'head-b', prNumber: 7 });
|
||||
const { runner } = createCodexRunnerForExecute(null);
|
||||
const internal = runner as unknown as {
|
||||
buildReviewPrompt: (runArg: ReviewRun, lastReviewedHead?: string) => string;
|
||||
};
|
||||
|
||||
const prompt = internal.buildReviewPrompt(run, 'reviewed-head-123');
|
||||
|
||||
expect(prompt).toContain('增量审查模式:仅审查上次审查后的新变更');
|
||||
expect(prompt).toContain('上次审查 SHA:reviewed-head-123');
|
||||
expect(prompt).toContain('git diff reviewed-head-123..head-b');
|
||||
});
|
||||
|
||||
test('normalizeApiBaseUrl appends /v1 when missing', () => {
|
||||
const { runner } = createCodexRunnerForExecute(null);
|
||||
const internal = runner as unknown as {
|
||||
normalizeApiBaseUrl: (rawUrl: string) => string;
|
||||
};
|
||||
|
||||
expect(internal.normalizeApiBaseUrl('https://api.example.com')).toBe(
|
||||
'https://api.example.com/v1'
|
||||
);
|
||||
expect(internal.normalizeApiBaseUrl('https://api.example.com/v1')).toBe(
|
||||
'https://api.example.com/v1'
|
||||
);
|
||||
expect(internal.normalizeApiBaseUrl('https://api.example.com/')).toBe(
|
||||
'https://api.example.com/v1'
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -1,244 +0,0 @@
|
||||
import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test';
|
||||
import type { DiffExtractor } from '../context/diff-extractor';
|
||||
import type { LocalRepoManager, LocalRepoPaths } from '../context/local-repo-manager';
|
||||
import type { FileReviewStore } from '../store/file-review-store';
|
||||
import type { Finding, ReviewContext, ReviewRun, ReviewTask } from '../types';
|
||||
|
||||
function makeRun(overrides: Partial<ReviewRun> = {}): ReviewRun {
|
||||
return {
|
||||
id: 'run-project-prompt',
|
||||
idempotencyKey: 'owner/repo#8:base...head',
|
||||
eventType: 'pull_request',
|
||||
status: 'in_progress',
|
||||
owner: 'owner',
|
||||
repo: 'repo',
|
||||
cloneUrl: 'https://example.com/repo.git',
|
||||
prNumber: 8,
|
||||
baseSha: 'base-sha',
|
||||
headSha: 'head-sha',
|
||||
attempts: 1,
|
||||
maxAttempts: 3,
|
||||
createdAt: new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString(),
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function createStoreMock() {
|
||||
return {
|
||||
markRunIgnored: mock(async () => undefined),
|
||||
addStep: mock(async () => undefined),
|
||||
getRunDetails: mock(async () => ({ comments: [], findings: [] })),
|
||||
addFindings: mock(async () => undefined),
|
||||
markFindingPublished: mock(async () => true),
|
||||
addCommentRecord: mock(async () => undefined),
|
||||
};
|
||||
}
|
||||
|
||||
function createLocalRepoManagerMock() {
|
||||
const repoPaths: LocalRepoPaths = {
|
||||
mirrorPath: '/tmp/mirror',
|
||||
workspacePath: '/tmp/workspace',
|
||||
};
|
||||
|
||||
return {
|
||||
manager: {
|
||||
prepareWorkspace: mock(async () => repoPaths),
|
||||
resolveReviewedRef: mock(async () => null),
|
||||
saveReviewedRef: mock(async () => undefined),
|
||||
cleanupWorkspace: mock(async () => undefined),
|
||||
},
|
||||
repoPaths,
|
||||
};
|
||||
}
|
||||
|
||||
function createDiffExtractorMock() {
|
||||
const context: ReviewContext = {
|
||||
workspacePath: '/tmp/workspace',
|
||||
mirrorPath: '/tmp/mirror',
|
||||
diff: 'diff --git a/src/app.ts b/src/app.ts\n+const x = 1;',
|
||||
changedFiles: [
|
||||
{
|
||||
path: 'src/app.ts',
|
||||
status: 'M',
|
||||
additions: 3,
|
||||
deletions: 1,
|
||||
},
|
||||
],
|
||||
parsedDiff: [],
|
||||
fileContents: {},
|
||||
};
|
||||
|
||||
return {
|
||||
context,
|
||||
extractor: {
|
||||
getSandbox: mock(() => ({
|
||||
execute: async () => ({ stdout: '', stderr: '', exitCode: 0 }),
|
||||
})),
|
||||
buildContext: mock(async () => context),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
describe('project prompt wiring', () => {
|
||||
beforeEach(() => {
|
||||
mock.restore();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
mock.restore();
|
||||
});
|
||||
|
||||
test('orchestrator forwards resolved project prompt to triage and specialist execution options', async () => {
|
||||
const projectPrompt = `repo-policy-${'P'.repeat(360)}`;
|
||||
|
||||
mock.module('../project-review-prompt', () => ({
|
||||
resolveProjectReviewPrompt: () => projectPrompt,
|
||||
}));
|
||||
|
||||
const { ReviewOrchestrator } = await import('../orchestrator');
|
||||
|
||||
const store = createStoreMock();
|
||||
const { manager } = createLocalRepoManagerMock();
|
||||
const { extractor } = createDiffExtractorMock();
|
||||
|
||||
const orchestrator = new ReviewOrchestrator(
|
||||
store as unknown as FileReviewStore,
|
||||
manager as unknown as LocalRepoManager,
|
||||
extractor as unknown as DiffExtractor
|
||||
);
|
||||
|
||||
type TriageResultLike = {
|
||||
complexity: 'trivial' | 'standard' | 'complex';
|
||||
reviewSize: 'small' | 'medium' | 'large';
|
||||
mode: 'skip' | 'light' | 'full';
|
||||
tasks: ReviewTask[];
|
||||
riskTags: string[];
|
||||
rationale: string;
|
||||
};
|
||||
|
||||
type ReviewFinding = Array<Omit<Finding, 'id' | 'runId' | 'published'>>;
|
||||
|
||||
type InternalOrchestrator = {
|
||||
triageAgent: {
|
||||
analyze: (
|
||||
context: ReviewContext,
|
||||
options?: { projectPrompt?: string }
|
||||
) => Promise<TriageResultLike>;
|
||||
};
|
||||
agentMap: Record<
|
||||
string,
|
||||
{
|
||||
reviewWithOptions: (
|
||||
run: ReviewRun,
|
||||
context: ReviewContext,
|
||||
options: { projectPrompt?: string }
|
||||
) => Promise<{ findings: ReviewFinding }>;
|
||||
reviewWithReflection: (
|
||||
run: ReviewRun,
|
||||
context: ReviewContext,
|
||||
maxRounds?: number,
|
||||
options?: { projectPrompt?: string }
|
||||
) => Promise<{ findings: ReviewFinding }>;
|
||||
}
|
||||
>;
|
||||
judgeAgent: {
|
||||
judge: (findings: ReviewFinding) => { summaryMarkdown: string; findings: ReviewFinding };
|
||||
};
|
||||
publishSummary: (run: ReviewRun, summary: string, gatedCount: number) => Promise<void>;
|
||||
publishLineComments: (
|
||||
run: ReviewRun,
|
||||
comments: Array<{ path: string; line: number; comment: string }>
|
||||
) => Promise<boolean>;
|
||||
};
|
||||
|
||||
const internal = orchestrator as unknown as InternalOrchestrator;
|
||||
|
||||
const task: ReviewTask = {
|
||||
domain: 'correctness',
|
||||
paths: ['src/app.ts'],
|
||||
riskTags: [],
|
||||
mode: 'light',
|
||||
tokenBudget: 1200,
|
||||
maxIterations: 1,
|
||||
allowTools: false,
|
||||
allowReflection: false,
|
||||
allowDebate: false,
|
||||
};
|
||||
|
||||
const triageAnalyzeMock = mock(async () => ({
|
||||
complexity: 'standard' as const,
|
||||
reviewSize: 'small' as const,
|
||||
mode: 'light' as const,
|
||||
tasks: [task],
|
||||
riskTags: [],
|
||||
rationale: 'project prompt wiring test',
|
||||
}));
|
||||
|
||||
const reviewWithOptionsMock = mock(async () => ({
|
||||
findings: [] as ReviewFinding,
|
||||
}));
|
||||
|
||||
const reviewWithReflectionMock = mock(async () => ({
|
||||
findings: [] as ReviewFinding,
|
||||
}));
|
||||
|
||||
internal.triageAgent = {
|
||||
analyze: triageAnalyzeMock,
|
||||
};
|
||||
|
||||
internal.agentMap = {
|
||||
correctness: {
|
||||
reviewWithOptions: reviewWithOptionsMock,
|
||||
reviewWithReflection: reviewWithReflectionMock,
|
||||
},
|
||||
};
|
||||
|
||||
internal.judgeAgent = {
|
||||
judge: mock(() => ({
|
||||
summaryMarkdown: 'ok',
|
||||
findings: [] as ReviewFinding,
|
||||
})),
|
||||
};
|
||||
|
||||
internal.publishSummary = mock(async () => undefined);
|
||||
internal.publishLineComments = mock(async () => false);
|
||||
|
||||
const run = makeRun();
|
||||
await orchestrator.execute(run);
|
||||
|
||||
expect(triageAnalyzeMock).toHaveBeenCalledWith(expect.anything(), { projectPrompt });
|
||||
expect(reviewWithOptionsMock).toHaveBeenCalledWith(
|
||||
run,
|
||||
expect.anything(),
|
||||
expect.objectContaining({ projectPrompt })
|
||||
);
|
||||
});
|
||||
|
||||
test('codex prompt builder includes resolved project-level prompt section', async () => {
|
||||
const projectPrompt = `codex-policy-${'X'.repeat(320)}`;
|
||||
|
||||
mock.module('../project-review-prompt', () => ({
|
||||
resolveProjectReviewPrompt: () => projectPrompt,
|
||||
}));
|
||||
|
||||
const { CodexRunner } = await import('../codex/codex-runner');
|
||||
|
||||
const store = createStoreMock();
|
||||
const { manager } = createLocalRepoManagerMock();
|
||||
|
||||
const runner = new CodexRunner(
|
||||
store as unknown as FileReviewStore,
|
||||
manager as unknown as LocalRepoManager
|
||||
);
|
||||
|
||||
const internal = runner as unknown as {
|
||||
buildReviewPrompt: (run: ReviewRun, lastReviewedHead?: string) => string;
|
||||
};
|
||||
|
||||
const prompt = internal.buildReviewPrompt(makeRun(), undefined);
|
||||
|
||||
expect(prompt).toContain('## 项目级审查要求');
|
||||
expect(prompt).toContain(projectPrompt);
|
||||
});
|
||||
});
|
||||
@@ -1,197 +0,0 @@
|
||||
import { describe, expect, test } from 'bun:test';
|
||||
import { applyPublishPolicy } from '../policy/publish-policy';
|
||||
import type { Finding } from '../types';
|
||||
|
||||
type TestFinding = Omit<Finding, 'id' | 'runId' | 'published'>;
|
||||
|
||||
function makeFinding(overrides: Partial<TestFinding> = {}): TestFinding {
|
||||
return {
|
||||
fingerprint: `fp-${Math.random().toString(36).slice(2, 8)}`,
|
||||
category: 'correctness',
|
||||
severity: 'medium',
|
||||
confidence: 0.9,
|
||||
path: 'src/foo.ts',
|
||||
line: 10,
|
||||
title: 'Test finding',
|
||||
detail: 'Detail',
|
||||
evidence: 'Evidence',
|
||||
suggestion: 'Fix it',
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('applyPublishPolicy', () => {
|
||||
const MIN_CONFIDENCE = 0.8;
|
||||
|
||||
// ─── Empty input ───
|
||||
test('empty findings → all arrays empty', () => {
|
||||
const result = applyPublishPolicy([], MIN_CONFIDENCE, false);
|
||||
expect(result.publishable).toEqual([]);
|
||||
expect(result.gated).toEqual([]);
|
||||
expect(result.dropped).toEqual([]);
|
||||
});
|
||||
|
||||
// ─── High confidence + medium/high severity → publishable ───
|
||||
test('high severity + high confidence → publishable (humanGate off)', () => {
|
||||
const f = makeFinding({ severity: 'high', confidence: 0.95 });
|
||||
const result = applyPublishPolicy([f], MIN_CONFIDENCE, false);
|
||||
expect(result.publishable).toHaveLength(1);
|
||||
expect(result.gated).toHaveLength(0);
|
||||
expect(result.dropped).toHaveLength(0);
|
||||
});
|
||||
|
||||
test('medium severity + high confidence → publishable (humanGate off)', () => {
|
||||
const f = makeFinding({ severity: 'medium', confidence: 0.85 });
|
||||
const result = applyPublishPolicy([f], MIN_CONFIDENCE, false);
|
||||
expect(result.publishable).toHaveLength(1);
|
||||
expect(result.gated).toHaveLength(0);
|
||||
expect(result.dropped).toHaveLength(0);
|
||||
});
|
||||
|
||||
test('high severity + exactly at threshold → publishable', () => {
|
||||
const f = makeFinding({ severity: 'high', confidence: 0.8 });
|
||||
const result = applyPublishPolicy([f], MIN_CONFIDENCE, false);
|
||||
expect(result.publishable).toHaveLength(1);
|
||||
});
|
||||
|
||||
// ─── Low severity → never publishable (even with high confidence) ───
|
||||
test('low severity + high confidence → dropped (humanGate off)', () => {
|
||||
const f = makeFinding({ severity: 'low', confidence: 0.95 });
|
||||
const result = applyPublishPolicy([f], MIN_CONFIDENCE, false);
|
||||
expect(result.publishable).toHaveLength(0);
|
||||
expect(result.gated).toHaveLength(0);
|
||||
expect(result.dropped).toHaveLength(1);
|
||||
});
|
||||
|
||||
test('low severity + high confidence → gated (humanGate on)', () => {
|
||||
const f = makeFinding({ severity: 'low', confidence: 0.95 });
|
||||
const result = applyPublishPolicy([f], MIN_CONFIDENCE, true);
|
||||
expect(result.publishable).toHaveLength(0);
|
||||
expect(result.gated).toHaveLength(1);
|
||||
expect(result.dropped).toHaveLength(0);
|
||||
});
|
||||
|
||||
// ─── Low confidence → not publishable ───
|
||||
test('high severity + low confidence → dropped (humanGate off)', () => {
|
||||
const f = makeFinding({ severity: 'high', confidence: 0.5 });
|
||||
const result = applyPublishPolicy([f], MIN_CONFIDENCE, false);
|
||||
expect(result.publishable).toHaveLength(0);
|
||||
expect(result.dropped).toHaveLength(1);
|
||||
});
|
||||
|
||||
test('high severity + low confidence → gated (humanGate on)', () => {
|
||||
const f = makeFinding({ severity: 'high', confidence: 0.5 });
|
||||
const result = applyPublishPolicy([f], MIN_CONFIDENCE, true);
|
||||
expect(result.publishable).toHaveLength(0);
|
||||
expect(result.gated).toHaveLength(1);
|
||||
expect(result.dropped).toHaveLength(0);
|
||||
});
|
||||
|
||||
test('medium severity + below threshold → dropped (humanGate off)', () => {
|
||||
const f = makeFinding({ severity: 'medium', confidence: 0.7 });
|
||||
const result = applyPublishPolicy([f], MIN_CONFIDENCE, false);
|
||||
expect(result.publishable).toHaveLength(0);
|
||||
expect(result.dropped).toHaveLength(1);
|
||||
});
|
||||
|
||||
test('medium severity + below threshold → gated (humanGate on)', () => {
|
||||
const f = makeFinding({ severity: 'medium', confidence: 0.7 });
|
||||
const result = applyPublishPolicy([f], MIN_CONFIDENCE, true);
|
||||
expect(result.publishable).toHaveLength(0);
|
||||
expect(result.gated).toHaveLength(1);
|
||||
});
|
||||
|
||||
// ─── Human gate ON: non-publishable → always gated, never dropped ───
|
||||
test('humanGate on: low confidence low severity → gated', () => {
|
||||
const f = makeFinding({ severity: 'low', confidence: 0.3 });
|
||||
const result = applyPublishPolicy([f], MIN_CONFIDENCE, true);
|
||||
expect(result.publishable).toHaveLength(0);
|
||||
expect(result.gated).toHaveLength(1);
|
||||
expect(result.dropped).toHaveLength(0);
|
||||
});
|
||||
|
||||
// ─── Mixed findings ───
|
||||
test('mixed findings split correctly', () => {
|
||||
const findings: TestFinding[] = [
|
||||
makeFinding({ severity: 'high', confidence: 0.95 }), // → publishable
|
||||
makeFinding({ severity: 'medium', confidence: 0.85 }), // → publishable
|
||||
makeFinding({ severity: 'low', confidence: 0.9 }), // → dropped (low severity, humanGate off)
|
||||
makeFinding({ severity: 'high', confidence: 0.5 }), // → dropped (low confidence)
|
||||
makeFinding({ severity: 'medium', confidence: 0.6 }), // → dropped (low confidence)
|
||||
];
|
||||
const result = applyPublishPolicy(findings, MIN_CONFIDENCE, false);
|
||||
expect(result.publishable).toHaveLength(2);
|
||||
expect(result.gated).toHaveLength(0);
|
||||
expect(result.dropped).toHaveLength(3);
|
||||
});
|
||||
|
||||
test('mixed findings with humanGate on', () => {
|
||||
const findings: TestFinding[] = [
|
||||
makeFinding({ severity: 'high', confidence: 0.95 }), // → publishable
|
||||
makeFinding({ severity: 'low', confidence: 0.9 }), // → gated
|
||||
makeFinding({ severity: 'high', confidence: 0.5 }), // → gated
|
||||
];
|
||||
const result = applyPublishPolicy(findings, MIN_CONFIDENCE, true);
|
||||
expect(result.publishable).toHaveLength(1);
|
||||
expect(result.gated).toHaveLength(2);
|
||||
expect(result.dropped).toHaveLength(0);
|
||||
});
|
||||
|
||||
// ─── Boundary: confidence exactly at threshold ───
|
||||
test('confidence exactly at threshold + medium severity → publishable', () => {
|
||||
const f = makeFinding({ severity: 'medium', confidence: MIN_CONFIDENCE });
|
||||
const result = applyPublishPolicy([f], MIN_CONFIDENCE, false);
|
||||
expect(result.publishable).toHaveLength(1);
|
||||
});
|
||||
|
||||
test('confidence just below threshold + medium severity → dropped', () => {
|
||||
const f = makeFinding({ severity: 'medium', confidence: MIN_CONFIDENCE - 0.01 });
|
||||
const result = applyPublishPolicy([f], MIN_CONFIDENCE, false);
|
||||
expect(result.dropped).toHaveLength(1);
|
||||
});
|
||||
|
||||
// ─── All same fingerprint (policy doesn't dedup, that's judge's job) ───
|
||||
test('all findings same fingerprint → all processed independently', () => {
|
||||
const fp = 'shared-fingerprint';
|
||||
const findings: TestFinding[] = [
|
||||
makeFinding({ fingerprint: fp, severity: 'high', confidence: 0.9 }),
|
||||
makeFinding({ fingerprint: fp, severity: 'medium', confidence: 0.85 }),
|
||||
makeFinding({ fingerprint: fp, severity: 'low', confidence: 0.95 }),
|
||||
];
|
||||
const result = applyPublishPolicy(findings, MIN_CONFIDENCE, false);
|
||||
// Policy doesn't care about fingerprint - each finding evaluated independently
|
||||
expect(result.publishable).toHaveLength(2); // high+medium
|
||||
expect(result.dropped).toHaveLength(1); // low severity
|
||||
});
|
||||
|
||||
// ─── Different minConfidence thresholds ───
|
||||
test('very low threshold → more findings publishable', () => {
|
||||
const f = makeFinding({ severity: 'medium', confidence: 0.3 });
|
||||
const result = applyPublishPolicy([f], 0.1, false);
|
||||
expect(result.publishable).toHaveLength(1);
|
||||
});
|
||||
|
||||
test('very high threshold → more findings dropped', () => {
|
||||
const f = makeFinding({ severity: 'high', confidence: 0.95 });
|
||||
const result = applyPublishPolicy([f], 0.99, false);
|
||||
expect(result.dropped).toHaveLength(1);
|
||||
});
|
||||
|
||||
// ─── Return value structure ───
|
||||
test('returned findings preserve all original fields', () => {
|
||||
const f = makeFinding({
|
||||
severity: 'high',
|
||||
confidence: 0.95,
|
||||
path: 'src/important.ts',
|
||||
line: 42,
|
||||
title: 'Critical bug',
|
||||
detail: 'Detailed explanation',
|
||||
evidence: 'Code snippet',
|
||||
suggestion: 'Fix suggestion',
|
||||
category: 'security',
|
||||
fingerprint: 'unique-fp-123',
|
||||
});
|
||||
const result = applyPublishPolicy([f], MIN_CONFIDENCE, false);
|
||||
expect(result.publishable[0]).toEqual(f);
|
||||
});
|
||||
});
|
||||
@@ -1,476 +0,0 @@
|
||||
import { describe, expect, mock, test } from 'bun:test';
|
||||
import { z } from 'zod';
|
||||
import type {
|
||||
LLMChatRequest,
|
||||
LLMChatResponse,
|
||||
LLMMessage,
|
||||
LLMToolDefinition,
|
||||
ModelRole,
|
||||
} from '../../llm/types';
|
||||
import { SpecialistAgent } from '../agents/specialist-agent';
|
||||
import { ToolRegistry } from '../tools/registry';
|
||||
import type { Tool } from '../tools/types';
|
||||
import type { FindingCategory, ReviewContext, ReviewRun } from '../types';
|
||||
|
||||
function makeRun(overrides: Partial<ReviewRun> = {}): ReviewRun {
|
||||
return {
|
||||
id: 'run-test-001',
|
||||
idempotencyKey: 'idem-test',
|
||||
eventType: 'pull_request',
|
||||
status: 'in_progress',
|
||||
owner: 'test-owner',
|
||||
repo: 'test-repo',
|
||||
cloneUrl: 'https://example.com/repo.git',
|
||||
prNumber: 1,
|
||||
baseSha: 'aaa',
|
||||
headSha: 'bbb',
|
||||
attempts: 0,
|
||||
maxAttempts: 2,
|
||||
createdAt: new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString(),
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function makeContext(overrides: Partial<ReviewContext> = {}): ReviewContext {
|
||||
return {
|
||||
workspacePath: '/tmp/test-workspace',
|
||||
mirrorPath: '/tmp/test-mirror',
|
||||
diff: '--- a/src/foo.ts\n+++ b/src/foo.ts\n@@ -1,3 +1,4 @@\n+const x = null;\n export function foo() {}',
|
||||
changedFiles: [{ path: 'src/foo.ts', status: 'M', additions: 1, deletions: 0 }],
|
||||
parsedDiff: [
|
||||
{
|
||||
path: 'src/foo.ts',
|
||||
changes: [{ lineNumber: 1, content: 'const x = null;', type: 'add' }],
|
||||
},
|
||||
],
|
||||
fileContents: { 'src/foo.ts': 'const x = null;\nexport function foo() {}' },
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function makeDummyTool(name = 'search_code'): Tool {
|
||||
return {
|
||||
name,
|
||||
description: 'Search code in the workspace',
|
||||
parameters: z.object({ query: z.string() }),
|
||||
execute: async () => ({ results: [] }),
|
||||
};
|
||||
}
|
||||
|
||||
type ChatRequest = {
|
||||
messages: LLMMessage[];
|
||||
temperature?: number;
|
||||
responseFormat?: 'text' | 'json';
|
||||
tools?: LLMToolDefinition[];
|
||||
providerOptions?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
type ChatCall = { role: ModelRole } & ChatRequest;
|
||||
|
||||
function createMockGateway(responses: Array<() => LLMChatResponse>) {
|
||||
let callIndex = 0;
|
||||
const calls: ChatCall[] = [];
|
||||
|
||||
return {
|
||||
gateway: {
|
||||
chatForRole: async (role: ModelRole, request: Omit<LLMChatRequest, 'model'>) => {
|
||||
calls.push({ role, ...request });
|
||||
const responseFn = responses[callIndex] ?? responses[responses.length - 1];
|
||||
callIndex++;
|
||||
return responseFn();
|
||||
},
|
||||
},
|
||||
getCalls: () => calls,
|
||||
};
|
||||
}
|
||||
|
||||
function toolCallResponse(
|
||||
toolCalls: Array<{ id: string; name: string; args: any }>
|
||||
): LLMChatResponse {
|
||||
return {
|
||||
content: null,
|
||||
toolCalls: toolCalls.map((tc) => ({
|
||||
id: tc.id,
|
||||
name: tc.name,
|
||||
arguments: JSON.stringify(tc.args),
|
||||
})),
|
||||
finishReason: 'tool_calls',
|
||||
usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
|
||||
};
|
||||
}
|
||||
|
||||
function jsonResponse(data: any): LLMChatResponse {
|
||||
return {
|
||||
content: JSON.stringify(data),
|
||||
toolCalls: [],
|
||||
finishReason: 'stop',
|
||||
usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
|
||||
};
|
||||
}
|
||||
|
||||
function emptyResponse(): LLMChatResponse {
|
||||
return {
|
||||
content: null,
|
||||
toolCalls: [],
|
||||
finishReason: 'stop',
|
||||
usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
|
||||
};
|
||||
}
|
||||
|
||||
describe('SpecialistAgent ReAct loop', () => {
|
||||
const category: FindingCategory = 'correctness';
|
||||
|
||||
test('empty diff returns empty findings without calling OpenAI', async () => {
|
||||
const { gateway } = createMockGateway([]);
|
||||
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs');
|
||||
const result = await agent.review(makeRun(), makeContext({ diff: ' ' }));
|
||||
expect(result.findings).toHaveLength(0);
|
||||
expect(result.agentName).toBe('TestAgent');
|
||||
});
|
||||
|
||||
test('no toolRegistry → uses single-call json mode', async () => {
|
||||
const finding = {
|
||||
severity: 'high',
|
||||
confidence: 0.9,
|
||||
path: 'src/foo.ts',
|
||||
line: 1,
|
||||
title: 'Null assignment',
|
||||
detail: 'x is null',
|
||||
evidence: 'const x = null',
|
||||
suggestion: 'Use undefined',
|
||||
};
|
||||
|
||||
const { gateway, getCalls } = createMockGateway([() => jsonResponse({ findings: [finding] })]);
|
||||
|
||||
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs');
|
||||
const result = await agent.review(makeRun(), makeContext());
|
||||
|
||||
expect(result.findings).toHaveLength(1);
|
||||
expect(result.findings[0].severity).toBe('high');
|
||||
expect(result.findings[0].category).toBe('correctness');
|
||||
expect(result.findings[0].fingerprint).toBeTruthy();
|
||||
|
||||
const calls = getCalls();
|
||||
expect(calls).toHaveLength(1);
|
||||
expect(calls[0].responseFormat).toBe('json');
|
||||
});
|
||||
|
||||
test('ReAct: tool call → tool result → final JSON findings', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
const executeFn = mock(async () => ({ results: ['some code match'] }));
|
||||
registry.register({ ...makeDummyTool(), execute: executeFn });
|
||||
|
||||
const finding = {
|
||||
severity: 'medium',
|
||||
confidence: 0.85,
|
||||
path: 'src/foo.ts',
|
||||
line: 1,
|
||||
title: 'Potential null',
|
||||
detail: 'Null assigned',
|
||||
evidence: 'const x = null',
|
||||
suggestion: 'Check usage',
|
||||
};
|
||||
|
||||
const { gateway, getCalls } = createMockGateway([
|
||||
() => toolCallResponse([{ id: 'call_1', name: 'search_code', args: { query: 'null' } }]),
|
||||
() => jsonResponse({ findings: [finding], need_more_investigation: false }),
|
||||
]);
|
||||
|
||||
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
|
||||
const result = await agent.review(makeRun(), makeContext());
|
||||
|
||||
expect(executeFn).toHaveBeenCalledTimes(1);
|
||||
expect(result.findings).toHaveLength(1);
|
||||
expect(result.findings[0].category).toBe('correctness');
|
||||
|
||||
const calls = getCalls();
|
||||
expect(calls).toHaveLength(2);
|
||||
});
|
||||
|
||||
test('ReAct: default staged mode uses 2 iterations and forces final json', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(makeDummyTool());
|
||||
|
||||
const { gateway, getCalls } = createMockGateway([
|
||||
() => toolCallResponse([{ id: 'call_1', name: 'search_code', args: { query: 'x' } }]),
|
||||
() => jsonResponse({ findings: [], need_more_investigation: false }),
|
||||
]);
|
||||
|
||||
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
|
||||
await agent.review(makeRun(), makeContext());
|
||||
|
||||
const calls = getCalls();
|
||||
expect(calls).toHaveLength(2);
|
||||
expect(calls[0].providerOptions).toEqual({ tool_choice: 'auto' });
|
||||
expect(calls[0].responseFormat).toBeUndefined();
|
||||
expect(calls[1].providerOptions).toEqual({ tool_choice: 'none' });
|
||||
expect(calls[1].responseFormat).toBe('json');
|
||||
});
|
||||
|
||||
test('ReAct: dead-loop prevention — need_more_investigation=true but no tool call injects user prompt', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(makeDummyTool());
|
||||
|
||||
const { gateway, getCalls } = createMockGateway([
|
||||
() => jsonResponse({ findings: [], need_more_investigation: true }),
|
||||
() => jsonResponse({ findings: [], need_more_investigation: false }),
|
||||
]);
|
||||
|
||||
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
|
||||
await agent.review(makeRun(), makeContext());
|
||||
|
||||
const calls = getCalls();
|
||||
expect(calls.length).toBeGreaterThanOrEqual(2);
|
||||
|
||||
const secondCallMessages = calls[1].messages;
|
||||
const lastUserMsg = secondCallMessages.filter((m: any) => m.role === 'user').pop();
|
||||
expect(lastUserMsg).toBeDefined();
|
||||
if (!lastUserMsg) throw new Error('Expected user message in second call');
|
||||
expect(lastUserMsg.content).toContain('使用工具');
|
||||
});
|
||||
|
||||
test('ReAct: fingerprint dedup across iterations — later finding with same fp overwrites', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(makeDummyTool());
|
||||
|
||||
const findingV1 = {
|
||||
severity: 'low' as const,
|
||||
confidence: 0.6,
|
||||
path: 'src/foo.ts',
|
||||
line: 1,
|
||||
title: 'Null issue',
|
||||
detail: 'First version',
|
||||
evidence: 'const x = null',
|
||||
suggestion: 'Fix v1',
|
||||
fingerprint: 'shared-fp-123',
|
||||
};
|
||||
|
||||
const findingV2 = {
|
||||
...findingV1,
|
||||
severity: 'high' as const,
|
||||
confidence: 0.95,
|
||||
detail: 'Second version - more confident',
|
||||
suggestion: 'Fix v2',
|
||||
};
|
||||
|
||||
const { gateway } = createMockGateway([
|
||||
() => jsonResponse({ findings: [findingV1], need_more_investigation: true }),
|
||||
() => jsonResponse({ findings: [findingV2], need_more_investigation: false }),
|
||||
]);
|
||||
|
||||
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
|
||||
const result = await agent.review(makeRun(), makeContext());
|
||||
|
||||
expect(result.findings).toHaveLength(1);
|
||||
expect(result.findings[0].severity).toBe('high');
|
||||
expect(result.findings[0].confidence).toBe(0.95);
|
||||
expect(result.findings[0].detail).toBe('Second version - more confident');
|
||||
});
|
||||
|
||||
test('ReAct: multiple unique fingerprints accumulate', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(makeDummyTool());
|
||||
|
||||
const finding1 = {
|
||||
severity: 'high' as const,
|
||||
confidence: 0.9,
|
||||
path: 'src/foo.ts',
|
||||
line: 1,
|
||||
title: 'Bug A',
|
||||
detail: 'Detail A',
|
||||
evidence: 'Evidence A',
|
||||
suggestion: 'Fix A',
|
||||
fingerprint: 'fp-aaa',
|
||||
};
|
||||
const finding2 = {
|
||||
severity: 'medium' as const,
|
||||
confidence: 0.8,
|
||||
path: 'src/bar.ts',
|
||||
line: 5,
|
||||
title: 'Bug B',
|
||||
detail: 'Detail B',
|
||||
evidence: 'Evidence B',
|
||||
suggestion: 'Fix B',
|
||||
fingerprint: 'fp-bbb',
|
||||
};
|
||||
|
||||
const { gateway } = createMockGateway([
|
||||
() => jsonResponse({ findings: [finding1], need_more_investigation: true }),
|
||||
() => jsonResponse({ findings: [finding2], need_more_investigation: false }),
|
||||
]);
|
||||
|
||||
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
|
||||
const result = await agent.review(makeRun(), makeContext());
|
||||
|
||||
expect(result.findings).toHaveLength(2);
|
||||
const fps = result.findings.map((f) => f.fingerprint);
|
||||
expect(fps).toContain('fp-aaa');
|
||||
expect(fps).toContain('fp-bbb');
|
||||
});
|
||||
|
||||
test('ReAct: OpenAI error returns empty findings gracefully', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(makeDummyTool());
|
||||
|
||||
const { gateway } = createMockGateway([
|
||||
() => {
|
||||
throw new Error('API rate limited');
|
||||
},
|
||||
]);
|
||||
|
||||
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
|
||||
const result = await agent.review(makeRun(), makeContext());
|
||||
|
||||
expect(result.findings).toHaveLength(0);
|
||||
expect(result.agentName).toBe('TestAgent');
|
||||
});
|
||||
|
||||
test('ReAct: unknown tool call returns error result to model', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(makeDummyTool('search_code'));
|
||||
|
||||
const { gateway, getCalls } = createMockGateway([
|
||||
() => toolCallResponse([{ id: 'call_1', name: 'nonexistent_tool', args: {} }]),
|
||||
() => jsonResponse({ findings: [], need_more_investigation: false }),
|
||||
]);
|
||||
|
||||
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
|
||||
await agent.review(makeRun(), makeContext());
|
||||
|
||||
const calls = getCalls();
|
||||
expect(calls).toHaveLength(2);
|
||||
const toolResultMsg = calls[1].messages.find(
|
||||
(m: any) => m.role === 'tool' && m.toolCallId === 'call_1'
|
||||
);
|
||||
expect(toolResultMsg).toBeTruthy();
|
||||
if (!toolResultMsg) throw new Error('Expected tool result message');
|
||||
const parsed = JSON.parse(toolResultMsg.content);
|
||||
expect(parsed.error).toContain('未找到');
|
||||
});
|
||||
|
||||
test('ReAct: tool execution error is captured and returned to model', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register({
|
||||
...makeDummyTool(),
|
||||
execute: async () => {
|
||||
throw new Error('Sandbox timeout');
|
||||
},
|
||||
});
|
||||
|
||||
const { gateway, getCalls } = createMockGateway([
|
||||
() => toolCallResponse([{ id: 'call_1', name: 'search_code', args: { query: 'x' } }]),
|
||||
() => jsonResponse({ findings: [], need_more_investigation: false }),
|
||||
]);
|
||||
|
||||
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
|
||||
await agent.review(makeRun(), makeContext());
|
||||
|
||||
const calls = getCalls();
|
||||
const toolResultMsg = calls[1].messages.find(
|
||||
(m: any) => m.role === 'tool' && m.toolCallId === 'call_1'
|
||||
);
|
||||
expect(toolResultMsg).toBeTruthy();
|
||||
if (!toolResultMsg) throw new Error('Expected tool result message');
|
||||
const parsed = JSON.parse(toolResultMsg.content);
|
||||
expect(parsed.error).toContain('Sandbox timeout');
|
||||
});
|
||||
|
||||
test('ReAct: empty choice content ends loop', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(makeDummyTool());
|
||||
|
||||
const { gateway } = createMockGateway([() => emptyResponse()]);
|
||||
|
||||
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
|
||||
const result = await agent.review(makeRun(), makeContext());
|
||||
|
||||
expect(result.findings).toHaveLength(0);
|
||||
});
|
||||
|
||||
test('ReAct: malformed JSON response ends loop gracefully', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(makeDummyTool());
|
||||
|
||||
const { gateway } = createMockGateway([
|
||||
() => ({
|
||||
content: 'not valid json {{{',
|
||||
toolCalls: [],
|
||||
finishReason: 'stop',
|
||||
usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
|
||||
}),
|
||||
]);
|
||||
|
||||
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
|
||||
const result = await agent.review(makeRun(), makeContext());
|
||||
|
||||
expect(result.findings).toHaveLength(0);
|
||||
});
|
||||
|
||||
test('staged context includes deleted lines metadata for review', async () => {
|
||||
const { gateway, getCalls } = createMockGateway([
|
||||
() =>
|
||||
jsonResponse({
|
||||
findings: [],
|
||||
need_more_investigation: false,
|
||||
}),
|
||||
]);
|
||||
|
||||
const context = makeContext({
|
||||
parsedDiff: [
|
||||
{
|
||||
path: 'src/foo.ts',
|
||||
changes: [
|
||||
{ lineNumber: 12, oldLineNumber: 11, content: 'if (auth) {', type: 'context' },
|
||||
{ lineNumber: 12, oldLineNumber: 12, content: 'if (isAdmin(user)) {', type: 'delete' },
|
||||
{ lineNumber: 13, oldLineNumber: 13, content: 'return true;', type: 'add' },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs');
|
||||
await agent.reviewWithOptions(makeRun(), context, {
|
||||
mode: 'full',
|
||||
allowTools: false,
|
||||
scopePaths: ['src/foo.ts'],
|
||||
maxContextTokens: 6000,
|
||||
});
|
||||
|
||||
const calls = getCalls();
|
||||
expect(calls).toHaveLength(1);
|
||||
const userMessage = calls[0].messages.find((message) => message.role === 'user');
|
||||
expect(userMessage).toBeDefined();
|
||||
if (!userMessage) throw new Error('Expected user message in request');
|
||||
|
||||
expect(userMessage.content).toContain('"type": "delete"');
|
||||
expect(userMessage.content).toContain('"oldLineNumber": 12');
|
||||
});
|
||||
|
||||
test('ReAct: auto-generates fingerprint when finding has none', async () => {
|
||||
const registry = new ToolRegistry();
|
||||
registry.register(makeDummyTool());
|
||||
|
||||
const finding = {
|
||||
severity: 'high' as const,
|
||||
confidence: 0.9,
|
||||
path: 'src/foo.ts',
|
||||
line: 1,
|
||||
title: 'Missing null check',
|
||||
detail: 'Detail',
|
||||
evidence: 'Evidence',
|
||||
suggestion: 'Add check',
|
||||
};
|
||||
|
||||
const { gateway } = createMockGateway([
|
||||
() => jsonResponse({ findings: [finding], need_more_investigation: false }),
|
||||
]);
|
||||
|
||||
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
|
||||
const result = await agent.review(makeRun(), makeContext());
|
||||
|
||||
expect(result.findings).toHaveLength(1);
|
||||
expect(result.findings[0].fingerprint).toBeTruthy();
|
||||
expect(result.findings[0].fingerprint.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
@@ -1,8 +1,8 @@
|
||||
import { describe, expect, test } from 'bun:test';
|
||||
import type { LLMGateway } from '../../llm/gateway';
|
||||
import type { LLMChatResponse, ModelRole } from '../../llm/types';
|
||||
import type { LLMChatRequest, LLMChatResponse, ModelRole } from '../../llm/types';
|
||||
import { TriageAgent } from '../agents/triage-agent';
|
||||
import type { ChangedFile, FindingCategory, ReviewContext } from '../types';
|
||||
import type { ChangedFile, ReviewContext } from '../types';
|
||||
|
||||
function makeChangedFile(overrides: Partial<ChangedFile> = {}): ChangedFile {
|
||||
return {
|
||||
@@ -37,58 +37,68 @@ function makeChatResponse(content: string | null): LLMChatResponse {
|
||||
|
||||
type ChatCall = {
|
||||
role: ModelRole;
|
||||
request: any;
|
||||
request: Omit<LLMChatRequest, 'model'>;
|
||||
};
|
||||
|
||||
function createMockGateway(
|
||||
implementation: (role: ModelRole, request: any) => Promise<LLMChatResponse>
|
||||
implementation: (
|
||||
role: ModelRole,
|
||||
request: Omit<LLMChatRequest, 'model'>
|
||||
) => Promise<LLMChatResponse>
|
||||
) {
|
||||
const calls: ChatCall[] = [];
|
||||
const gateway: Pick<LLMGateway, 'chatForRole'> = {
|
||||
chatForRole: async (role: ModelRole, request: Omit<LLMChatRequest, 'model'>) => {
|
||||
calls.push({ role, request });
|
||||
return implementation(role, request);
|
||||
},
|
||||
};
|
||||
|
||||
return {
|
||||
gateway: {
|
||||
chatForRole: async (role: ModelRole, request: any) => {
|
||||
calls.push({ role, request });
|
||||
return implementation(role, request);
|
||||
},
|
||||
},
|
||||
gateway,
|
||||
getCalls: () => calls,
|
||||
};
|
||||
}
|
||||
|
||||
describe('TriageAgent task-based routing', () => {
|
||||
test('heuristic: empty changedFiles -> skip mode with no tasks', async () => {
|
||||
describe('TriageAgent hint-based planning', () => {
|
||||
test('heuristic: empty changedFiles -> skip mode with hints only', async () => {
|
||||
const { gateway, getCalls } = createMockGateway(async () =>
|
||||
makeChatResponse(
|
||||
JSON.stringify({
|
||||
complexity: 'complex',
|
||||
review_size: 'large',
|
||||
mode: 'full',
|
||||
relevant_domains: ['correctness', 'security', 'reliability', 'maintainability'],
|
||||
suspected_entrypoints: ['src/ignored.ts'],
|
||||
})
|
||||
)
|
||||
);
|
||||
const agent = new TriageAgent(gateway as any);
|
||||
const agent = new TriageAgent(gateway as unknown as LLMGateway);
|
||||
|
||||
const result = await agent.analyze(makeContext({ changedFiles: [] }));
|
||||
|
||||
expect(result.mode).toBe('skip');
|
||||
expect(result.tasks).toHaveLength(0);
|
||||
expect('tasks' in result).toBe(false);
|
||||
expect(result.suspectedEntrypoints).toEqual([]);
|
||||
expect(result.budgetHints).toEqual({
|
||||
maxTurns: 0,
|
||||
maxToolCalls: 0,
|
||||
maxElapsedMs: 0,
|
||||
tokenBudget: 0,
|
||||
});
|
||||
expect(result.changedFileSummary.totalFiles).toBe(0);
|
||||
expect(getCalls()).toHaveLength(0);
|
||||
});
|
||||
|
||||
test('heuristic: docs/assets only -> skip mode with no tasks', async () => {
|
||||
test('heuristic: docs/assets only -> skip mode with hints only', async () => {
|
||||
const { gateway, getCalls } = createMockGateway(async () =>
|
||||
makeChatResponse(
|
||||
JSON.stringify({
|
||||
complexity: 'complex',
|
||||
review_size: 'large',
|
||||
mode: 'full',
|
||||
relevant_domains: ['correctness', 'security', 'reliability', 'maintainability'],
|
||||
suspected_entrypoints: ['src/ignored.ts'],
|
||||
})
|
||||
)
|
||||
);
|
||||
const agent = new TriageAgent(gateway as any);
|
||||
const agent = new TriageAgent(gateway as unknown as LLMGateway);
|
||||
|
||||
const result = await agent.analyze(
|
||||
makeContext({
|
||||
@@ -101,13 +111,15 @@ describe('TriageAgent task-based routing', () => {
|
||||
);
|
||||
|
||||
expect(result.mode).toBe('skip');
|
||||
expect(result.tasks).toHaveLength(0);
|
||||
expect('tasks' in result).toBe(false);
|
||||
expect(result.suspectedEntrypoints).toEqual([]);
|
||||
expect(result.changedFileSummary.files).toContain('M README.md (+1 -1)');
|
||||
expect(getCalls()).toHaveLength(0);
|
||||
});
|
||||
|
||||
test('heuristic: tiny single-file code change -> light correctness task', async () => {
|
||||
test('heuristic: tiny single-file code change -> light hints only', async () => {
|
||||
const { gateway, getCalls } = createMockGateway(async () => makeChatResponse(null));
|
||||
const agent = new TriageAgent(gateway as any);
|
||||
const agent = new TriageAgent(gateway as unknown as LLMGateway);
|
||||
|
||||
const result = await agent.analyze(
|
||||
makeContext({
|
||||
@@ -116,16 +128,24 @@ describe('TriageAgent task-based routing', () => {
|
||||
);
|
||||
|
||||
expect(result.mode).toBe('light');
|
||||
expect(result.tasks).toHaveLength(1);
|
||||
expect(result.tasks[0].domain).toBe('correctness');
|
||||
expect(result.tasks[0].allowTools).toBe(false);
|
||||
expect(result.tasks[0].maxIterations).toBe(1);
|
||||
expect('tasks' in result).toBe(false);
|
||||
expect(result.suspectedEntrypoints).toEqual(['src/app.ts']);
|
||||
expect(result.budgetHints).toMatchObject({
|
||||
maxTurns: 4,
|
||||
maxToolCalls: 4,
|
||||
maxElapsedMs: 60_000,
|
||||
});
|
||||
expect(result.changedFileSummary).toMatchObject({
|
||||
totalFiles: 1,
|
||||
totalAdditions: 1,
|
||||
totalDeletions: 2,
|
||||
});
|
||||
expect(getCalls()).toHaveLength(0);
|
||||
});
|
||||
|
||||
test('heuristic: security-sensitive small change -> full correctness+security tasks', async () => {
|
||||
test('heuristic: security-sensitive small change -> full hints only', async () => {
|
||||
const { gateway, getCalls } = createMockGateway(async () => makeChatResponse(null));
|
||||
const agent = new TriageAgent(gateway as any);
|
||||
const agent = new TriageAgent(gateway as unknown as LLMGateway);
|
||||
|
||||
const result = await agent.analyze(
|
||||
makeContext({
|
||||
@@ -137,15 +157,20 @@ describe('TriageAgent task-based routing', () => {
|
||||
);
|
||||
|
||||
expect(result.mode).toBe('full');
|
||||
const domains = result.tasks.map((task) => task.domain);
|
||||
expect(domains).toContain('correctness');
|
||||
expect(domains).toContain('security');
|
||||
expect('tasks' in result).toBe(false);
|
||||
expect(result.riskTags).toContain('security-sensitive');
|
||||
expect(result.suspectedEntrypoints).toEqual(['src/auth/service.ts', 'src/user/profile.ts']);
|
||||
expect(result.budgetHints).toMatchObject({
|
||||
maxTurns: 10,
|
||||
maxToolCalls: 12,
|
||||
maxElapsedMs: 180_000,
|
||||
});
|
||||
expect(getCalls()).toHaveLength(0);
|
||||
});
|
||||
|
||||
test('heuristic: large PR by file count -> full mode with all domains', async () => {
|
||||
test('heuristic: large PR by file count -> large full budget hints', async () => {
|
||||
const { gateway, getCalls } = createMockGateway(async () => makeChatResponse(null));
|
||||
const agent = new TriageAgent(gateway as any);
|
||||
const agent = new TriageAgent(gateway as unknown as LLMGateway);
|
||||
|
||||
const changedFiles = Array.from({ length: 21 }, (_, index) =>
|
||||
makeChangedFile({ path: `src/file-${index}.ts`, additions: 2, deletions: 1 })
|
||||
@@ -155,31 +180,30 @@ describe('TriageAgent task-based routing', () => {
|
||||
|
||||
expect(result.mode).toBe('full');
|
||||
expect(result.reviewSize).toBe('large');
|
||||
expect(result.complexity).toBe('complex');
|
||||
const expectedDomains: FindingCategory[] = [
|
||||
'correctness',
|
||||
'maintainability',
|
||||
'reliability',
|
||||
'security',
|
||||
];
|
||||
expect(result.tasks.map((task) => task.domain).sort()).toEqual(expectedDomains.sort());
|
||||
expect('tasks' in result).toBe(false);
|
||||
expect(result.budgetHints).toMatchObject({
|
||||
maxTurns: 12,
|
||||
maxToolCalls: 16,
|
||||
maxElapsedMs: 240_000,
|
||||
});
|
||||
expect(result.suspectedEntrypoints).toHaveLength(12);
|
||||
expect(result.changedFileSummary.files).toHaveLength(12);
|
||||
expect(getCalls()).toHaveLength(0);
|
||||
});
|
||||
|
||||
test('LLM fallback: inconclusive change uses planner and normalizes tasks', async () => {
|
||||
test('LLM fallback: inconclusive change uses planner and normalizes hints', async () => {
|
||||
const { gateway, getCalls } = createMockGateway(async () =>
|
||||
makeChatResponse(
|
||||
JSON.stringify({
|
||||
complexity: 'standard',
|
||||
review_size: 'medium',
|
||||
mode: 'light',
|
||||
relevant_domains: ['security', 'maintainability'],
|
||||
risk_tags: ['security-sensitive'],
|
||||
suspected_entrypoints: ['src/service/order.ts', 'src/controller/order.ts'],
|
||||
rationale: '跨文件业务逻辑调整',
|
||||
})
|
||||
)
|
||||
);
|
||||
const agent = new TriageAgent(gateway as any);
|
||||
const agent = new TriageAgent(gateway as unknown as LLMGateway);
|
||||
|
||||
const result = await agent.analyze(
|
||||
makeContext({
|
||||
@@ -197,11 +221,20 @@ describe('TriageAgent task-based routing', () => {
|
||||
expect(calls[0].role).toBe('planner');
|
||||
expect(calls[0].request.temperature).toBe(0);
|
||||
expect(calls[0].request.responseFormat).toBe('json');
|
||||
const plannerMessages = calls[0].request.messages as Array<{ role: string; content: string }>;
|
||||
const plannerUserMessage = plannerMessages.find((message) => message.role === 'user');
|
||||
expect(plannerUserMessage?.content).not.toContain('relevant_domains');
|
||||
expect(plannerUserMessage?.content).not.toContain('"tasks"');
|
||||
expect(plannerUserMessage?.content).not.toContain('可选领域');
|
||||
|
||||
expect(result.reviewSize).toBe('medium');
|
||||
expect(result.mode).toBe('light');
|
||||
expect(result.tasks.map((task) => task.domain)).toContain('correctness');
|
||||
expect(result.tasks.map((task) => task.domain)).toContain('security');
|
||||
expect('tasks' in result).toBe(false);
|
||||
expect(result.suspectedEntrypoints).toEqual([
|
||||
'src/service/order.ts',
|
||||
'src/controller/order.ts',
|
||||
]);
|
||||
expect(result.riskTags).toEqual(['security-sensitive']);
|
||||
expect(result.rationale).toBe('跨文件业务逻辑调整');
|
||||
});
|
||||
|
||||
@@ -210,11 +243,10 @@ describe('TriageAgent task-based routing', () => {
|
||||
const { gateway, getCalls } = createMockGateway(async () =>
|
||||
makeChatResponse(
|
||||
JSON.stringify({
|
||||
complexity: 'standard',
|
||||
review_size: 'medium',
|
||||
mode: 'light',
|
||||
relevant_domains: ['correctness'],
|
||||
risk_tags: ['maintainability-hotspot'],
|
||||
risk_tags: ['quality-sensitive'],
|
||||
suspected_entrypoints: ['src/service/order.ts'],
|
||||
rationale: '需要模型判断',
|
||||
})
|
||||
)
|
||||
@@ -243,11 +275,11 @@ describe('TriageAgent task-based routing', () => {
|
||||
expect(plannerSystemMessage?.content).toContain(longProjectPrompt);
|
||||
});
|
||||
|
||||
test('LLM fallback: planner throws -> default full review with all domains', async () => {
|
||||
test('LLM fallback: planner throws -> default full review hints', async () => {
|
||||
const { gateway, getCalls } = createMockGateway(async () => {
|
||||
throw new Error('planner unavailable');
|
||||
});
|
||||
const agent = new TriageAgent(gateway as any);
|
||||
const agent = new TriageAgent(gateway as unknown as LLMGateway);
|
||||
|
||||
const result = await agent.analyze(
|
||||
makeContext({
|
||||
@@ -263,13 +295,13 @@ describe('TriageAgent task-based routing', () => {
|
||||
|
||||
expect(getCalls()).toHaveLength(1);
|
||||
expect(result.mode).toBe('full');
|
||||
const expectedDomains: FindingCategory[] = [
|
||||
'correctness',
|
||||
'maintainability',
|
||||
'reliability',
|
||||
'security',
|
||||
];
|
||||
expect(result.tasks.map((task) => task.domain).sort()).toEqual(expectedDomains.sort());
|
||||
expect('tasks' in result).toBe(false);
|
||||
expect(result.suspectedEntrypoints).toContain('src/service/foo.ts');
|
||||
expect(result.budgetHints).toMatchObject({
|
||||
maxTurns: 10,
|
||||
maxToolCalls: 12,
|
||||
maxElapsedMs: 180_000,
|
||||
});
|
||||
expect(result.rationale).toContain('LLM');
|
||||
});
|
||||
});
|
||||
|
||||
415
src/review/agents/autonomous-review-agent.ts
Normal file
415
src/review/agents/autonomous-review-agent.ts
Normal file
@@ -0,0 +1,415 @@
|
||||
import { createHash } from 'node:crypto';
|
||||
import { getKernelAgentContext } from '../../agent-kernel/agents/kernel-agent-context';
|
||||
import type { KernelHookRegistry } from '../../agent-kernel/hooks/kernel-hook-registry';
|
||||
import config from '../../config';
|
||||
import type { LLMGateway } from '../../llm/gateway';
|
||||
import type { LLMMessage, LLMToolCall } from '../../llm/types';
|
||||
import { mergeReviewPrompts, withGlobalPrompt } from '../../utils/global-prompt';
|
||||
import { logger } from '../../utils/logger';
|
||||
import { tokenCounter } from '../context/token-counter';
|
||||
import { findingResponseSchema } from '../schema/finding-schema';
|
||||
import { ToolRegistry } from '../tools/registry';
|
||||
import { runToolOrchestration } from '../tools/tool-orchestration';
|
||||
import type { ToolExecutionContext, ToolResult } from '../tools/types';
|
||||
import {
|
||||
AgentResult,
|
||||
Finding,
|
||||
FindingCategory,
|
||||
REVIEW_DEFAULT_BUDGETS,
|
||||
ReviewContext,
|
||||
ReviewRun,
|
||||
ReviewTask,
|
||||
} from '../types';
|
||||
|
||||
export type AutonomousReviewState =
|
||||
| 'investigating'
|
||||
| 'tool_calling'
|
||||
| 'synthesizing'
|
||||
| 'finalizing'
|
||||
| 'completed'
|
||||
| 'failed';
|
||||
|
||||
interface AutonomousReviewDiagnostics {
|
||||
scopedPaths?: string[];
|
||||
compactContextTokens?: number;
|
||||
iterations: number;
|
||||
stateSequence: AutonomousReviewState[];
|
||||
stopReason?: string;
|
||||
toolCallNames: string[];
|
||||
toolCallCount: number;
|
||||
parsedFindingCount?: number;
|
||||
finalResponsePreview?: string;
|
||||
parseErrors: string[];
|
||||
emptyResponseCount: number;
|
||||
consecutiveToolFailures: number;
|
||||
}
|
||||
|
||||
interface AutonomousReviewOptions {
|
||||
projectPrompt?: string;
|
||||
contextSummary?: string;
|
||||
}
|
||||
|
||||
interface ResolvedBudget {
|
||||
maxTurns: number;
|
||||
maxToolCalls: number;
|
||||
maxElapsedMs: number;
|
||||
}
|
||||
|
||||
function buildFingerprint(category: string, path: string, line: number, title: string): string {
|
||||
return createHash('sha256')
|
||||
.update(`${category}:${path}:${line}:${title}`)
|
||||
.digest('hex')
|
||||
.slice(0, 24);
|
||||
}
|
||||
|
||||
function previewContent(content: string | null | undefined): string | undefined {
|
||||
if (!content) return undefined;
|
||||
return content.length > 2000 ? `${content.slice(0, 2000)}…` : content;
|
||||
}
|
||||
|
||||
function resolveBudget(task: ReviewTask): ResolvedBudget {
|
||||
const defaults =
|
||||
task.mode === 'full' && task.reviewSize === 'large'
|
||||
? REVIEW_DEFAULT_BUDGETS.largeFull
|
||||
: task.mode === 'full'
|
||||
? REVIEW_DEFAULT_BUDGETS.full
|
||||
: REVIEW_DEFAULT_BUDGETS.light;
|
||||
|
||||
return {
|
||||
maxTurns: Math.max(1, task.maxTurns ?? defaults.maxTurns),
|
||||
maxToolCalls: Math.max(0, task.maxToolCalls ?? defaults.maxToolCalls),
|
||||
maxElapsedMs: Math.max(1, task.maxElapsedMs ?? defaults.maxElapsedMs),
|
||||
};
|
||||
}
|
||||
|
||||
function toCompactContext(context: ReviewContext, task: ReviewTask): string {
|
||||
const payload = JSON.stringify(
|
||||
{
|
||||
changedFiles: context.changedFiles,
|
||||
diffSnippets: context.parsedDiff,
|
||||
fileContents: context.fileContents,
|
||||
},
|
||||
null,
|
||||
2
|
||||
);
|
||||
|
||||
return tokenCounter.count(payload) > task.tokenBudget
|
||||
? tokenCounter.clip(payload, task.tokenBudget)
|
||||
: payload;
|
||||
}
|
||||
|
||||
function buildSystemPrompt(toolRegistry?: ToolRegistry, projectPrompt?: string): string {
|
||||
const toolList = toolRegistry?.getAll().length
|
||||
? toolRegistry
|
||||
.getAll()
|
||||
.map((tool) => `- ${tool.name}: ${tool.description}`)
|
||||
.join('\n')
|
||||
: '无可用工具。';
|
||||
|
||||
return withGlobalPrompt(
|
||||
`你是 Claude Code/Codex 风格的自主代码审查代理。你负责完整审查本次变更,不按 correctness/security/quality 拆分任务。
|
||||
|
||||
可用工具:
|
||||
${toolList}
|
||||
|
||||
工作规则:
|
||||
1. 先调查,再结论。你可以自主选择 search_code、read_file 或其他工具,不要等待外部程序替你选文件。
|
||||
2. 不要按文件孤立审查;需要跨文件确认 API 持久化、状态流、权限、错误分支、边界条件和相似实现。
|
||||
3. 仅报告有明确证据、会导致真实功能/安全/可靠性问题的 finding。
|
||||
4. 当需要更多信息时直接调用工具;当调查完成时输出最终 JSON:{"findings":[...]}。无问题返回 {"findings":[]}。
|
||||
5. 每个 finding 必须包含 severity、confidence、path、line、title、detail、evidence、suggestion,可选 category 为 correctness/security/quality。
|
||||
6. suspectedEntrypoints 只是调查优先级提示,不是范围过滤;请仍然审查所有变更文件、diff 片段和可用文件内容。`,
|
||||
|
||||
mergeReviewPrompts(config.review.globalPrompt, projectPrompt)
|
||||
);
|
||||
}
|
||||
|
||||
function buildUserPrompt(
|
||||
context: ReviewContext,
|
||||
task: ReviewTask,
|
||||
compactContext: string,
|
||||
contextSummary?: string
|
||||
): string {
|
||||
const changedFiles = context.changedFiles
|
||||
.map(
|
||||
(file, index) =>
|
||||
`${index + 1}. ${file.path} (+${file.additions}/-${file.deletions}, ${file.status})`
|
||||
)
|
||||
.join('\n');
|
||||
|
||||
return `审查任务:
|
||||
- mode: ${task.mode}
|
||||
- reviewSize: ${task.reviewSize ?? 'unknown'}
|
||||
- riskTags: ${task.riskTags.length ? task.riskTags.join(', ') : 'none'}
|
||||
- suspectedEntrypoints: ${task.suspectedEntrypoints?.length ? task.suspectedEntrypoints.join(', ') : 'none'}
|
||||
- tokenBudget: ${task.tokenBudget}
|
||||
${contextSummary ? `- compressedSummary: ${contextSummary}` : ''}
|
||||
|
||||
变更文件清单:
|
||||
${changedFiles || '无变更文件'}
|
||||
|
||||
请自主调查这些变更,模型自己决定是否调用工具以及调用哪些工具。不要只凭文件名或 diff 猜测;完成调查后输出最终 JSON。
|
||||
|
||||
压缩上下文:
|
||||
${compactContext}`;
|
||||
}
|
||||
|
||||
export class AutonomousReviewAgent {
|
||||
constructor(
|
||||
private readonly gateway: LLMGateway,
|
||||
private readonly toolRegistry?: ToolRegistry,
|
||||
private readonly hookRegistry?: KernelHookRegistry,
|
||||
private readonly agentName = 'AutonomousReviewAgent',
|
||||
private readonly projectPrompt?: string
|
||||
) {}
|
||||
|
||||
async review(run: ReviewRun, context: ReviewContext, task: ReviewTask): Promise<AgentResult> {
|
||||
return this.reviewWithOptions(run, context, task);
|
||||
}
|
||||
|
||||
async reviewWithOptions(
|
||||
run: ReviewRun,
|
||||
context: ReviewContext,
|
||||
task: ReviewTask,
|
||||
options: AutonomousReviewOptions = {}
|
||||
): Promise<AgentResult> {
|
||||
if (!context.diff.trim() || task.mode === 'skip') {
|
||||
return { agentName: this.agentName, findings: [] };
|
||||
}
|
||||
|
||||
const budget = resolveBudget(task);
|
||||
const startTime = Date.now();
|
||||
const compactContext = toCompactContext(context, task);
|
||||
const diagnostics: AutonomousReviewDiagnostics = {
|
||||
scopedPaths: task.suspectedEntrypoints,
|
||||
compactContextTokens: tokenCounter.count(compactContext),
|
||||
iterations: 0,
|
||||
stateSequence: [],
|
||||
toolCallNames: [],
|
||||
toolCallCount: 0,
|
||||
parseErrors: [],
|
||||
emptyResponseCount: 0,
|
||||
consecutiveToolFailures: 0,
|
||||
};
|
||||
const messages: LLMMessage[] = [
|
||||
{
|
||||
role: 'system',
|
||||
content: buildSystemPrompt(this.toolRegistry, options.projectPrompt ?? this.projectPrompt),
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: buildUserPrompt(context, task, compactContext, options.contextSummary),
|
||||
},
|
||||
];
|
||||
|
||||
let finalAnswer: string | undefined;
|
||||
const transition = (next: AutonomousReviewState) => {
|
||||
diagnostics.stateSequence.push(next);
|
||||
};
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
if (Date.now() - startTime >= budget.maxElapsedMs) {
|
||||
diagnostics.stopReason = 'maxElapsedMs';
|
||||
transition('synthesizing');
|
||||
finalAnswer = await this.synthesizeFinalAnswer(messages, diagnostics);
|
||||
transition('finalizing');
|
||||
break;
|
||||
}
|
||||
if (diagnostics.iterations >= budget.maxTurns) {
|
||||
diagnostics.stopReason = 'maxTurns';
|
||||
transition('synthesizing');
|
||||
finalAnswer = await this.synthesizeFinalAnswer(messages, diagnostics);
|
||||
transition('finalizing');
|
||||
break;
|
||||
}
|
||||
if (diagnostics.toolCallCount >= budget.maxToolCalls) {
|
||||
diagnostics.stopReason = 'maxToolCalls';
|
||||
transition('synthesizing');
|
||||
finalAnswer = await this.synthesizeFinalAnswer(messages, diagnostics);
|
||||
transition('finalizing');
|
||||
break;
|
||||
}
|
||||
if (diagnostics.emptyResponseCount >= 2) {
|
||||
diagnostics.stopReason = 'emptyResponses';
|
||||
transition('synthesizing');
|
||||
finalAnswer = await this.synthesizeFinalAnswer(messages, diagnostics);
|
||||
transition('finalizing');
|
||||
break;
|
||||
}
|
||||
if (diagnostics.consecutiveToolFailures >= 3) {
|
||||
diagnostics.stopReason = 'toolFailures';
|
||||
transition('synthesizing');
|
||||
finalAnswer = await this.synthesizeFinalAnswer(messages, diagnostics);
|
||||
transition('finalizing');
|
||||
break;
|
||||
}
|
||||
|
||||
transition('investigating');
|
||||
diagnostics.iterations += 1;
|
||||
const response = await this.gateway.chatForRole('specialist', {
|
||||
messages,
|
||||
temperature: 0,
|
||||
tools: this.toolRegistry?.getAll().length
|
||||
? this.toolRegistry.toToolDefinitions()
|
||||
: undefined,
|
||||
});
|
||||
|
||||
if (response.toolCalls.length > 0) {
|
||||
const allowedToolCalls = response.toolCalls.slice(
|
||||
0,
|
||||
Math.max(0, budget.maxToolCalls - diagnostics.toolCallCount)
|
||||
);
|
||||
diagnostics.toolCallNames.push(...allowedToolCalls.map((toolCall) => toolCall.name));
|
||||
diagnostics.toolCallCount += allowedToolCalls.length;
|
||||
messages.push({
|
||||
role: 'assistant',
|
||||
content: response.content || '',
|
||||
toolCalls: allowedToolCalls,
|
||||
});
|
||||
|
||||
transition('tool_calling');
|
||||
const toolResults = await this.executeTools(allowedToolCalls, {
|
||||
workspacePath: context.workspacePath,
|
||||
mirrorPath: context.mirrorPath,
|
||||
runId: run.id,
|
||||
});
|
||||
const failures = toolResults.filter((toolResult) => !toolResult.success).length;
|
||||
diagnostics.consecutiveToolFailures =
|
||||
failures > 0 ? diagnostics.consecutiveToolFailures + failures : 0;
|
||||
|
||||
for (const toolResult of toolResults) {
|
||||
messages.push({
|
||||
role: 'tool',
|
||||
toolCallId: toolResult.toolCallId,
|
||||
content: JSON.stringify(toolResult.result || { error: toolResult.error }),
|
||||
});
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!response.content?.trim()) {
|
||||
diagnostics.emptyResponseCount += 1;
|
||||
messages.push({
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
diagnostics.stopReason = 'modelFinalized';
|
||||
finalAnswer = response.content;
|
||||
diagnostics.finalResponsePreview = previewContent(finalAnswer);
|
||||
transition('finalizing');
|
||||
break;
|
||||
}
|
||||
|
||||
const findings = await this.finalizeFindings(
|
||||
messages,
|
||||
finalAnswer ?? '{"findings":[]}',
|
||||
diagnostics
|
||||
);
|
||||
diagnostics.parsedFindingCount = findings.length;
|
||||
transition('completed');
|
||||
return {
|
||||
agentName: this.agentName,
|
||||
findings,
|
||||
diagnostics,
|
||||
};
|
||||
} catch (error) {
|
||||
transition('failed');
|
||||
logger.error(`${this.agentName} 执行失败`, {
|
||||
runId: run.id,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
return { agentName: this.agentName, findings: [], diagnostics };
|
||||
}
|
||||
}
|
||||
|
||||
private async synthesizeFinalAnswer(
|
||||
messages: LLMMessage[],
|
||||
diagnostics: AutonomousReviewDiagnostics
|
||||
): Promise<string> {
|
||||
messages.push({
|
||||
role: 'user',
|
||||
content:
|
||||
'你已达到停止条件。请基于现有调查证据输出最终 JSON:{"findings":[...]}。不要调用工具,不要输出额外文字。',
|
||||
});
|
||||
const response = await this.gateway.chatForRole('specialist', {
|
||||
messages,
|
||||
temperature: 0,
|
||||
responseFormat: 'json',
|
||||
});
|
||||
const content = response.content || '{"findings":[]}';
|
||||
diagnostics.finalResponsePreview = previewContent(content);
|
||||
messages.push({ role: 'assistant', content });
|
||||
return content;
|
||||
}
|
||||
|
||||
private async finalizeFindings(
|
||||
messages: LLMMessage[],
|
||||
content: string,
|
||||
diagnostics: AutonomousReviewDiagnostics
|
||||
): Promise<Omit<Finding, 'id' | 'runId' | 'published'>[]> {
|
||||
let current = content;
|
||||
for (let attempt = 0; attempt <= 2; attempt++) {
|
||||
try {
|
||||
const parsed = findingResponseSchema.parse(JSON.parse(current));
|
||||
return parsed.findings.map((item) => {
|
||||
const category: FindingCategory = item.category ?? 'correctness';
|
||||
return {
|
||||
...item,
|
||||
category,
|
||||
fingerprint:
|
||||
item.fingerprint || buildFingerprint(category, item.path, item.line, item.title),
|
||||
};
|
||||
});
|
||||
} catch (parseError) {
|
||||
const message = parseError instanceof Error ? parseError.message : String(parseError);
|
||||
diagnostics.parseErrors.push(message);
|
||||
if (attempt === 2) {
|
||||
throw parseError;
|
||||
}
|
||||
messages.push({ role: 'assistant', content: current });
|
||||
messages.push({
|
||||
role: 'user',
|
||||
content:
|
||||
'上一次最终结果无法通过 findingResponseSchema。请修复为严格 JSON:{"findings":[{"severity":"high|medium|low","confidence":0.0,"path":"...","line":1,"title":"...","detail":"...","evidence":"...","suggestion":"..."}]}。不要输出额外文字。',
|
||||
});
|
||||
const repair = await this.gateway.chatForRole('specialist', {
|
||||
messages,
|
||||
temperature: 0,
|
||||
responseFormat: 'json',
|
||||
});
|
||||
current = repair.content || '{"findings":[]}';
|
||||
diagnostics.finalResponsePreview = previewContent(current);
|
||||
}
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
private async executeTools(
|
||||
toolCalls: LLMToolCall[],
|
||||
context: ToolExecutionContext
|
||||
): Promise<ToolResult[]> {
|
||||
if (!this.toolRegistry || toolCalls.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const agentContext = getKernelAgentContext();
|
||||
const orchestration = await runToolOrchestration({
|
||||
registry: this.toolRegistry,
|
||||
toolCalls,
|
||||
context: {
|
||||
...context,
|
||||
agentName: this.agentName,
|
||||
agentId: agentContext?.agentId,
|
||||
source: 'react',
|
||||
},
|
||||
hookRegistry: this.hookRegistry,
|
||||
});
|
||||
|
||||
return orchestration.results;
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
import type { LLMGateway } from '../../llm/gateway';
|
||||
import type { LearningSystem } from '../learning/learning-system';
|
||||
import { ToolRegistry } from '../tools/registry';
|
||||
import { SpecialistAgent } from './specialist-agent';
|
||||
|
||||
export class CorrectnessAgent extends SpecialistAgent {
|
||||
constructor(gateway: LLMGateway, toolRegistry?: ToolRegistry, learningSystem?: LearningSystem) {
|
||||
super(
|
||||
gateway,
|
||||
'correctness',
|
||||
'Correctness Agent',
|
||||
'业务逻辑正确性、边界条件、空值处理和明显bug',
|
||||
toolRegistry,
|
||||
learningSystem
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,347 +0,0 @@
|
||||
import config from '../../config';
|
||||
import type { LLMGateway } from '../../llm/gateway';
|
||||
import type { LLMMessage } from '../../llm/types';
|
||||
import { mergeReviewPrompts, withCoreGlobalPrompt } from '../../utils/global-prompt';
|
||||
import { logger } from '../../utils/logger';
|
||||
import { Finding, FindingSeverity } from '../types';
|
||||
import { SpecialistAgent } from './specialist-agent';
|
||||
|
||||
interface AgentOpinion {
|
||||
agentName: string;
|
||||
confidence: number;
|
||||
severity: FindingSeverity;
|
||||
reasoning: string;
|
||||
isValid: boolean;
|
||||
}
|
||||
|
||||
export class DebateOrchestrator {
|
||||
private gateway: LLMGateway;
|
||||
|
||||
constructor(gateway: LLMGateway) {
|
||||
this.gateway = gateway;
|
||||
}
|
||||
|
||||
async conductDebate(
|
||||
finding: Omit<Finding, 'id' | 'runId' | 'published'>,
|
||||
agents: SpecialistAgent[],
|
||||
maxRounds = 2,
|
||||
projectPrompt?: string
|
||||
): Promise<Omit<Finding, 'id' | 'runId' | 'published'>> {
|
||||
if (agents.length < 2) {
|
||||
logger.debug('Debate需要至少2个agents,跳过');
|
||||
return finding;
|
||||
}
|
||||
|
||||
logger.info('启动Debate', {
|
||||
finding: finding.title,
|
||||
agentsCount: agents.length,
|
||||
maxRounds,
|
||||
});
|
||||
|
||||
const opinions = new Map<string, AgentOpinion>();
|
||||
|
||||
// 收集初始意见
|
||||
for (const agent of agents) {
|
||||
const opinion = await this.getAgentOpinion(agent, finding, projectPrompt);
|
||||
opinions.set((agent as any).agentName, opinion);
|
||||
}
|
||||
|
||||
// 辩论轮次
|
||||
for (let round = 0; round < maxRounds; round++) {
|
||||
logger.debug(`Debate Round ${round + 1}/${maxRounds}`, {
|
||||
finding: finding.title,
|
||||
});
|
||||
|
||||
for (const agent of agents) {
|
||||
const agentName = (agent as any).agentName;
|
||||
const otherOpinions = Array.from(opinions.entries()).filter(([name]) => name !== agentName);
|
||||
|
||||
const revisedOpinion = await this.reviseOpinion(
|
||||
agent,
|
||||
finding,
|
||||
otherOpinions,
|
||||
opinions,
|
||||
projectPrompt
|
||||
);
|
||||
|
||||
opinions.set(agentName, revisedOpinion);
|
||||
}
|
||||
|
||||
// 检查是否已达成共识
|
||||
if (this.hasConsensus(opinions)) {
|
||||
logger.info(`Debate在第${round + 1}轮达成共识`, {
|
||||
finding: finding.title,
|
||||
});
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// 形成共识
|
||||
return this.formConsensus(finding, opinions);
|
||||
}
|
||||
|
||||
private async getAgentOpinion(
|
||||
agent: SpecialistAgent,
|
||||
finding: Omit<Finding, 'id' | 'runId' | 'published'>,
|
||||
projectPrompt?: string
|
||||
): Promise<AgentOpinion> {
|
||||
const agentName = (agent as any).agentName;
|
||||
const prompt = `你是${agentName}。评估以下代码问题的严重性、置信度和有效性。
|
||||
|
||||
问题:
|
||||
- Title: ${finding.title}
|
||||
- Detail: ${finding.detail}
|
||||
- Evidence: ${finding.evidence}
|
||||
- Current Severity: ${finding.severity}
|
||||
- Current Confidence: ${finding.confidence}
|
||||
|
||||
从你的专业角度判断:
|
||||
1. 这个问题是否真实存在(不是误报)?
|
||||
2. 严重性评估是否准确?
|
||||
3. 你的置信度是多少?
|
||||
4. 你的判断理由?
|
||||
|
||||
返回JSON:
|
||||
{
|
||||
"is_valid": true/false,
|
||||
"confidence": 0.0-1.0,
|
||||
"severity": "high" | "medium" | "low",
|
||||
"reasoning": "你的判断理由(详细说明)"
|
||||
}`;
|
||||
|
||||
try {
|
||||
const messages: LLMMessage[] = [
|
||||
{
|
||||
role: 'system',
|
||||
content: withCoreGlobalPrompt(
|
||||
`你是${agentName},从你的专业角度独立评估代码问题。`,
|
||||
mergeReviewPrompts(config.review.globalPrompt, projectPrompt)
|
||||
),
|
||||
},
|
||||
{ role: 'user', content: prompt },
|
||||
];
|
||||
|
||||
const response = await this.gateway.chatForRole('specialist', {
|
||||
messages,
|
||||
temperature: 0.2,
|
||||
responseFormat: 'json',
|
||||
});
|
||||
|
||||
const content = response.content;
|
||||
if (!content) {
|
||||
throw new Error('Agent opinion返回空');
|
||||
}
|
||||
|
||||
const parsed = JSON.parse(content);
|
||||
|
||||
return {
|
||||
agentName,
|
||||
// 使用 ?? 而非 ||,保留有效的0置信度(完全不确定/强烈拒绝)
|
||||
confidence: parsed.confidence ?? 0.5,
|
||||
severity: parsed.severity || 'medium',
|
||||
reasoning: parsed.reasoning || '',
|
||||
isValid: parsed.is_valid ?? true,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error(`获取${agentName}意见失败`, {
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
|
||||
return {
|
||||
agentName,
|
||||
confidence: finding.confidence,
|
||||
severity: finding.severity,
|
||||
reasoning: '获取意见失败,使用默认值',
|
||||
isValid: true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private async reviseOpinion(
|
||||
agent: SpecialistAgent,
|
||||
finding: Omit<Finding, 'id' | 'runId' | 'published'>,
|
||||
otherOpinions: [string, AgentOpinion][],
|
||||
opinions: Map<string, AgentOpinion>,
|
||||
projectPrompt?: string
|
||||
): Promise<AgentOpinion> {
|
||||
const agentName = (agent as any).agentName;
|
||||
const prompt = `你是${agentName}。重新评估以下问题,考虑其他专家的意见。
|
||||
|
||||
问题:
|
||||
- Title: ${finding.title}
|
||||
- Evidence: ${finding.evidence}
|
||||
|
||||
其他专家意见:
|
||||
${otherOpinions
|
||||
.map(
|
||||
([name, op]) =>
|
||||
`- ${name}: ${op.isValid ? '有效' : '误报'}, ${op.severity} (置信度 ${op.confidence.toFixed(2)})\n 理由: ${
|
||||
op.reasoning
|
||||
}`
|
||||
)
|
||||
.join('\n')}
|
||||
|
||||
基于同行的意见,你是否改变观点?
|
||||
|
||||
返回JSON:
|
||||
{
|
||||
"is_valid": true/false,
|
||||
"confidence": 0.0-1.0,
|
||||
"severity": "high" | "medium" | "low",
|
||||
"reasoning": "修正后的理由或坚持原判断的原因"
|
||||
}`;
|
||||
|
||||
try {
|
||||
const messages: LLMMessage[] = [
|
||||
{
|
||||
role: 'system',
|
||||
content: withCoreGlobalPrompt(
|
||||
`你是${agentName},根据同行意见重新评估,但也要坚持你的专业判断。`,
|
||||
mergeReviewPrompts(config.review.globalPrompt, projectPrompt)
|
||||
),
|
||||
},
|
||||
{ role: 'user', content: prompt },
|
||||
];
|
||||
|
||||
const response = await this.gateway.chatForRole('specialist', {
|
||||
messages,
|
||||
temperature: 0.3, // 允许一定灵活性
|
||||
responseFormat: 'json',
|
||||
});
|
||||
|
||||
const content = response.content;
|
||||
if (!content) {
|
||||
throw new Error('Revised opinion返回空');
|
||||
}
|
||||
|
||||
const parsed = JSON.parse(content);
|
||||
|
||||
return {
|
||||
agentName,
|
||||
// 使用 ?? 而非 ||,保留有效的0置信度(完全不确定/强烈拒绝)
|
||||
confidence: parsed.confidence ?? 0.5,
|
||||
severity: parsed.severity || 'medium',
|
||||
reasoning: parsed.reasoning || '',
|
||||
isValid: parsed.is_valid ?? true,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error(`${agentName}修订意见失败`, {
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
|
||||
// 返回当前意见(从opinions Map中获取)
|
||||
const currentOpinion = opinions.get(agentName);
|
||||
return (
|
||||
currentOpinion || {
|
||||
agentName,
|
||||
confidence: 0.5,
|
||||
severity: 'medium',
|
||||
reasoning: '修订失败',
|
||||
isValid: true,
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private hasConsensus(opinions: Map<string, AgentOpinion>): boolean {
|
||||
const votes = Array.from(opinions.values());
|
||||
|
||||
if (votes.length === 0) return true;
|
||||
|
||||
// 检查有效性共识(至少80%同意)
|
||||
const validCount = votes.filter((v) => v.isValid).length;
|
||||
const validRatio = validCount / votes.length;
|
||||
|
||||
if (validRatio >= 0.8 || validRatio <= 0.2) {
|
||||
return true; // 大多数同意有效或无效
|
||||
}
|
||||
|
||||
// 检查严重性共识
|
||||
const severityCounts: Record<FindingSeverity, number> = {
|
||||
high: 0,
|
||||
medium: 0,
|
||||
low: 0,
|
||||
};
|
||||
|
||||
votes.forEach((v) => {
|
||||
severityCounts[v.severity]++;
|
||||
});
|
||||
|
||||
const maxCount = Math.max(...Object.values(severityCounts));
|
||||
const consensusRatio = maxCount / votes.length;
|
||||
|
||||
return consensusRatio >= 0.7; // 70%同意同一严重性
|
||||
}
|
||||
|
||||
private formConsensus(
|
||||
finding: Omit<Finding, 'id' | 'runId' | 'published'>,
|
||||
opinions: Map<string, AgentOpinion>
|
||||
): Omit<Finding, 'id' | 'runId' | 'published'> {
|
||||
const votes = Array.from(opinions.values());
|
||||
|
||||
if (votes.length === 0) {
|
||||
return finding;
|
||||
}
|
||||
|
||||
// 判断有效性(投票)
|
||||
const validCount = votes.filter((v) => v.isValid).length;
|
||||
const isValid = validCount > votes.length / 2;
|
||||
|
||||
// 如果被判定为无效,降低置信度
|
||||
if (!isValid) {
|
||||
logger.info('Debate判定为无效finding', {
|
||||
finding: finding.title,
|
||||
validVotes: validCount,
|
||||
totalVotes: votes.length,
|
||||
});
|
||||
|
||||
return {
|
||||
...finding,
|
||||
confidence: Math.min(finding.confidence, 0.4),
|
||||
detail: `${finding.detail}\n\n**Debate结果**: 多数专家认为此问题可能是误报(${validCount}/${votes.length}认为有效)`,
|
||||
};
|
||||
}
|
||||
|
||||
// 计算平均置信度(仅计算认为有效的votes)
|
||||
const validVotes = votes.filter((v) => v.isValid);
|
||||
const avgConfidence = validVotes.reduce((sum, v) => sum + v.confidence, 0) / validVotes.length;
|
||||
|
||||
// 严重性投票(加权)
|
||||
const severityVotes: Record<FindingSeverity, number> = {
|
||||
high: 0,
|
||||
medium: 0,
|
||||
low: 0,
|
||||
};
|
||||
|
||||
validVotes.forEach((vote) => {
|
||||
severityVotes[vote.severity] += vote.confidence;
|
||||
});
|
||||
|
||||
const agreedSeverity =
|
||||
(Object.entries(severityVotes).sort((a, b) => b[1] - a[1])[0][0] as FindingSeverity) ||
|
||||
finding.severity;
|
||||
|
||||
// 综合推理
|
||||
const synthesizedDetail = `${finding.detail}\n\n**专家Debate意见汇总:**\n${validVotes
|
||||
.map(
|
||||
(v) => `- ${v.agentName} (${v.severity}, 置信度${v.confidence.toFixed(2)}): ${v.reasoning}`
|
||||
)
|
||||
.join('\n')}`;
|
||||
|
||||
logger.info('Debate达成共识', {
|
||||
finding: finding.title,
|
||||
originalSeverity: finding.severity,
|
||||
agreedSeverity,
|
||||
originalConfidence: finding.confidence,
|
||||
avgConfidence,
|
||||
validVotes: validVotes.length,
|
||||
});
|
||||
|
||||
return {
|
||||
...finding,
|
||||
confidence: avgConfidence,
|
||||
severity: agreedSeverity,
|
||||
detail: synthesizedDetail,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -1,54 +0,0 @@
|
||||
import { Finding, ReviewDecision } from '../types';
|
||||
|
||||
const severityWeight: Record<Finding['severity'], number> = {
|
||||
high: 3,
|
||||
medium: 2,
|
||||
low: 1,
|
||||
};
|
||||
|
||||
function summarizeFindings(findings: Omit<Finding, 'id' | 'runId' | 'published'>[]): string {
|
||||
if (findings.length === 0) {
|
||||
return '本次变更未发现需要立即处理的高置信问题。建议人工快速复核关键业务路径。';
|
||||
}
|
||||
|
||||
const total = findings.length;
|
||||
const high = findings.filter((item) => item.severity === 'high').length;
|
||||
const medium = findings.filter((item) => item.severity === 'medium').length;
|
||||
const low = findings.filter((item) => item.severity === 'low').length;
|
||||
|
||||
return [
|
||||
`本次 AI Agent 审查共识别 ${total} 个问题,其中 high ${high} 个、medium ${medium} 个、low ${low} 个。`,
|
||||
'以下评论按风险优先级自动发布,建议优先处理 high 与 medium 项。',
|
||||
].join('\n\n');
|
||||
}
|
||||
|
||||
export class JudgeAgent {
|
||||
judge(results: Omit<Finding, 'id' | 'runId' | 'published'>[]): ReviewDecision {
|
||||
const bestByFingerprint = new Map<string, Omit<Finding, 'id' | 'runId' | 'published'>>();
|
||||
|
||||
for (const finding of results) {
|
||||
const existing = bestByFingerprint.get(finding.fingerprint);
|
||||
if (!existing) {
|
||||
bestByFingerprint.set(finding.fingerprint, finding);
|
||||
continue;
|
||||
}
|
||||
|
||||
const existingWeight = severityWeight[existing.severity] * existing.confidence;
|
||||
const currentWeight = severityWeight[finding.severity] * finding.confidence;
|
||||
if (currentWeight > existingWeight) {
|
||||
bestByFingerprint.set(finding.fingerprint, finding);
|
||||
}
|
||||
}
|
||||
|
||||
const findings = [...bestByFingerprint.values()].sort((a, b) => {
|
||||
const scoreA = severityWeight[a.severity] * a.confidence;
|
||||
const scoreB = severityWeight[b.severity] * b.confidence;
|
||||
return scoreB - scoreA;
|
||||
});
|
||||
|
||||
return {
|
||||
summaryMarkdown: summarizeFindings(findings),
|
||||
findings,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
import type { LLMGateway } from '../../llm/gateway';
|
||||
import type { LearningSystem } from '../learning/learning-system';
|
||||
import { ToolRegistry } from '../tools/registry';
|
||||
import { SpecialistAgent } from './specialist-agent';
|
||||
|
||||
export class MaintainabilityAgent extends SpecialistAgent {
|
||||
constructor(gateway: LLMGateway, toolRegistry?: ToolRegistry, learningSystem?: LearningSystem) {
|
||||
super(
|
||||
gateway,
|
||||
'maintainability',
|
||||
'Maintainability Agent',
|
||||
'可维护性、复杂度、接口破坏风险和可测试性不足',
|
||||
toolRegistry,
|
||||
learningSystem
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,194 +0,0 @@
|
||||
import { createHash } from 'node:crypto';
|
||||
import config from '../../config';
|
||||
import type { LLMGateway } from '../../llm/gateway';
|
||||
import type { LLMMessage } from '../../llm/types';
|
||||
import { mergeReviewPrompts, withGlobalPrompt } from '../../utils/global-prompt';
|
||||
import { logger } from '../../utils/logger';
|
||||
import { tokenCounter } from '../context/token-counter';
|
||||
import { LearningSystem } from '../learning/learning-system';
|
||||
import { findingResponseSchema } from '../schema/finding-schema';
|
||||
import { ToolRegistry } from '../tools/registry';
|
||||
import { AgentResult, Finding, FindingCategory, ReviewContext, ReviewRun } from '../types';
|
||||
import { CriticAgent, CritiqueResult } from './critic-agent';
|
||||
import { SpecialistAgent, type SpecialistReviewOptions } from './specialist-agent';
|
||||
|
||||
function buildFingerprint(category: string, path: string, line: number, title: string): string {
|
||||
return createHash('sha256')
|
||||
.update(`${category}:${path}:${line}:${title}`)
|
||||
.digest('hex')
|
||||
.slice(0, 24);
|
||||
}
|
||||
|
||||
export class ReflexionAgent extends SpecialistAgent {
|
||||
private criticAgent: CriticAgent;
|
||||
|
||||
constructor(
|
||||
gateway: LLMGateway,
|
||||
category: FindingCategory,
|
||||
agentName: string,
|
||||
focusPrompt: string,
|
||||
toolRegistry?: ToolRegistry,
|
||||
learningSystem?: LearningSystem
|
||||
) {
|
||||
super(gateway, category, agentName, focusPrompt, toolRegistry, learningSystem);
|
||||
this.criticAgent = new CriticAgent(gateway);
|
||||
}
|
||||
|
||||
async reviewWithReflection(
|
||||
run: ReviewRun,
|
||||
context: ReviewContext,
|
||||
maxReflectionRounds = 2,
|
||||
options?: SpecialistReviewOptions
|
||||
): Promise<AgentResult> {
|
||||
let bestFindings: Omit<Finding, 'id' | 'runId' | 'published'>[] = [];
|
||||
let bestQualityScore = 0;
|
||||
let currentFindings: Omit<Finding, 'id' | 'runId' | 'published'>[] = [];
|
||||
const projectPrompt = options?.projectPrompt;
|
||||
|
||||
for (let round = 0; round < maxReflectionRounds; round++) {
|
||||
logger.info(`${this.agentName} Reflection Round ${round + 1}/${maxReflectionRounds}`, {
|
||||
runId: run.id,
|
||||
});
|
||||
|
||||
// 生成初步findings(首轮或基于上一轮refined结果)
|
||||
const draft = await this.generateDraft(run, context, currentFindings, round, options);
|
||||
|
||||
// 自我批评
|
||||
const critique = await this.criticAgent.critique(draft, context, projectPrompt);
|
||||
|
||||
logger.info(`${this.agentName} Critique结果`, {
|
||||
runId: run.id,
|
||||
round: round + 1,
|
||||
qualityScore: critique.qualityScore,
|
||||
issuesFound: critique.issues.length,
|
||||
missedIssues: critique.missedIssues.length,
|
||||
});
|
||||
|
||||
// 如果质量已经很好,提前结束并保存最佳结果
|
||||
if (critique.qualityScore >= 0.9 && critique.issues.length === 0) {
|
||||
bestFindings = draft;
|
||||
bestQualityScore = critique.qualityScore;
|
||||
logger.info(`${this.agentName} 质量满足要求,提前结束Reflection`, {
|
||||
runId: run.id,
|
||||
finalScore: critique.qualityScore,
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
// 如果这轮质量更好,保存为最佳结果
|
||||
if (critique.qualityScore > bestQualityScore) {
|
||||
bestQualityScore = critique.qualityScore;
|
||||
bestFindings = draft;
|
||||
}
|
||||
|
||||
// 如果还有改进空间,继续优化(refine后需要在下一轮重新评估)
|
||||
if (round < maxReflectionRounds - 1) {
|
||||
currentFindings = await this.refine(draft, critique, context, run, projectPrompt);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
agentName: this.agentName,
|
||||
findings: bestFindings,
|
||||
};
|
||||
}
|
||||
|
||||
private async generateDraft(
|
||||
run: ReviewRun,
|
||||
context: ReviewContext,
|
||||
previousFindings: Omit<Finding, 'id' | 'runId' | 'published'>[],
|
||||
round: number,
|
||||
options?: SpecialistReviewOptions
|
||||
): Promise<Omit<Finding, 'id' | 'runId' | 'published'>[]> {
|
||||
// 第一轮:使用父类的review方法
|
||||
if (round === 0) {
|
||||
const result = await super.reviewWithOptions(run, context, options);
|
||||
return result.findings;
|
||||
}
|
||||
|
||||
// 后续轮次:在前一轮基础上改进(由refine方法生成)
|
||||
return previousFindings;
|
||||
}
|
||||
|
||||
private async refine(
|
||||
draft: Omit<Finding, 'id' | 'runId' | 'published'>[],
|
||||
critique: CritiqueResult,
|
||||
context: ReviewContext,
|
||||
run: ReviewRun,
|
||||
projectPrompt?: string
|
||||
): Promise<Omit<Finding, 'id' | 'runId' | 'published'>[]> {
|
||||
const prompt = `你是${this.agentName}。根据以下批评意见,改进审查结果。
|
||||
|
||||
原始findings(${draft.length}个):
|
||||
${JSON.stringify(draft, null, 2)}
|
||||
|
||||
Critic Agent的批评意见:
|
||||
质量评分: ${critique.qualityScore}
|
||||
发现的问题(${critique.issues.length}个):
|
||||
${critique.issues.map((issue) => `- Finding #${issue.findingIndex}: ${issue.problem}\n 建议: ${issue.suggestion}`).join('\n')}
|
||||
|
||||
可能遗漏的问题(${critique.missedIssues.length}个):
|
||||
${critique.missedIssues.map((missed) => `- ${missed}`).join('\n')}
|
||||
|
||||
总体评估: ${critique.overallAssessment}
|
||||
|
||||
代码上下文:
|
||||
${tokenCounter.clip(context.diff, 1000)}
|
||||
|
||||
任务:
|
||||
1. 修正有问题的findings(根据批评意见)
|
||||
2. 补充遗漏的问题(如果确实存在)
|
||||
3. 移除误报
|
||||
4. 提升evidence的充分性和具体性
|
||||
|
||||
返回改进后的findings JSON数组,格式:
|
||||
{
|
||||
"findings": [...]
|
||||
}`;
|
||||
|
||||
try {
|
||||
const messages: LLMMessage[] = [
|
||||
{
|
||||
role: 'system',
|
||||
content: withGlobalPrompt(
|
||||
`你是${this.agentName},根据批评反馈改进审查结果。`,
|
||||
mergeReviewPrompts(config.review.globalPrompt, projectPrompt)
|
||||
),
|
||||
},
|
||||
{ role: 'user', content: prompt },
|
||||
];
|
||||
|
||||
const response = await this.gateway.chatForRole('specialist', {
|
||||
messages,
|
||||
temperature: 0.1,
|
||||
responseFormat: 'json',
|
||||
});
|
||||
|
||||
const content = response.content;
|
||||
if (!content) {
|
||||
logger.warn(`${this.agentName} Refine返回空结果,使用原findings`);
|
||||
return draft;
|
||||
}
|
||||
|
||||
const parsed = JSON.parse(content);
|
||||
|
||||
// 使用schema验证refined findings,防止畸形数据流入发布系统
|
||||
const validated = findingResponseSchema.parse({ findings: parsed.findings || draft });
|
||||
|
||||
// 标准化category和fingerprint
|
||||
return validated.findings.map((finding) => ({
|
||||
...finding,
|
||||
category: this.category,
|
||||
fingerprint:
|
||||
finding.fingerprint ||
|
||||
buildFingerprint(this.category, finding.path, finding.line, finding.title),
|
||||
}));
|
||||
} catch (error) {
|
||||
logger.error(`${this.agentName} Refine失败`, {
|
||||
runId: run.id,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
return draft; // 失败时返回原findings
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
import type { LLMGateway } from '../../llm/gateway';
|
||||
import type { LearningSystem } from '../learning/learning-system';
|
||||
import { ToolRegistry } from '../tools/registry';
|
||||
import { SpecialistAgent } from './specialist-agent';
|
||||
|
||||
export class ReliabilityAgent extends SpecialistAgent {
|
||||
constructor(gateway: LLMGateway, toolRegistry?: ToolRegistry, learningSystem?: LearningSystem) {
|
||||
super(
|
||||
gateway,
|
||||
'reliability',
|
||||
'Reliability Agent',
|
||||
'错误处理、重试策略、幂等性、并发一致性和资源释放',
|
||||
toolRegistry,
|
||||
learningSystem
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
import type { LLMGateway } from '../../llm/gateway';
|
||||
import type { LearningSystem } from '../learning/learning-system';
|
||||
import { ToolRegistry } from '../tools/registry';
|
||||
import { SpecialistAgent } from './specialist-agent';
|
||||
|
||||
export class SecurityAgent extends SpecialistAgent {
|
||||
constructor(gateway: LLMGateway, toolRegistry?: ToolRegistry, learningSystem?: LearningSystem) {
|
||||
super(
|
||||
gateway,
|
||||
'security',
|
||||
'Security Agent',
|
||||
'注入漏洞、权限绕过、敏感信息泄露、反序列化和输入校验缺失',
|
||||
toolRegistry,
|
||||
learningSystem
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,484 +0,0 @@
|
||||
import { createHash } from 'node:crypto';
|
||||
import config from '../../config';
|
||||
import type { LLMGateway } from '../../llm/gateway';
|
||||
import type { LLMMessage, LLMToolCall } from '../../llm/types';
|
||||
import { mergeReviewPrompts, withGlobalPrompt } from '../../utils/global-prompt';
|
||||
import { logger } from '../../utils/logger';
|
||||
import { tokenCounter } from '../context/token-counter';
|
||||
import type { LearningSystem } from '../learning/learning-system';
|
||||
import { findingResponseSchema } from '../schema/finding-schema';
|
||||
import { ToolRegistry } from '../tools/registry';
|
||||
import type { ToolExecutionContext, ToolResult } from '../tools/types';
|
||||
import {
|
||||
AgentResult,
|
||||
Finding,
|
||||
FindingCategory,
|
||||
ReviewContext,
|
||||
ReviewMode,
|
||||
ReviewRun,
|
||||
} from '../types';
|
||||
|
||||
function buildFingerprint(category: string, path: string, line: number, title: string): string {
|
||||
return createHash('sha256')
|
||||
.update(`${category}:${path}:${line}:${title}`)
|
||||
.digest('hex')
|
||||
.slice(0, 24);
|
||||
}
|
||||
|
||||
interface CompactContextOptions {
|
||||
scopePaths?: string[];
|
||||
maxContextTokens?: number;
|
||||
}
|
||||
|
||||
export interface SpecialistReviewOptions {
|
||||
scopePaths?: string[];
|
||||
allowTools?: boolean;
|
||||
maxIterations?: number;
|
||||
mode?: ReviewMode;
|
||||
maxContextTokens?: number;
|
||||
projectPrompt?: string;
|
||||
}
|
||||
|
||||
function toCompactContext(context: ReviewContext, options?: CompactContextOptions): string {
|
||||
const MAX_CONTEXT_TOKENS = options?.maxContextTokens ?? 25_000;
|
||||
|
||||
const scopedPaths = options?.scopePaths ? new Set(options.scopePaths) : null;
|
||||
|
||||
const scopedChangedFiles = scopedPaths
|
||||
? context.changedFiles.filter((file) => scopedPaths.has(file.path))
|
||||
: context.changedFiles;
|
||||
|
||||
const scopedParsedDiff = scopedPaths
|
||||
? context.parsedDiff.filter((file) => scopedPaths.has(file.path))
|
||||
: context.parsedDiff;
|
||||
|
||||
const scopedFileContents = scopedPaths
|
||||
? Object.fromEntries(
|
||||
Object.entries(context.fileContents).filter(([filePath]) => scopedPaths.has(filePath))
|
||||
)
|
||||
: context.fileContents;
|
||||
|
||||
const files = scopedChangedFiles.map((file) => ({
|
||||
path: file.path,
|
||||
status: file.status,
|
||||
additions: file.additions,
|
||||
deletions: file.deletions,
|
||||
}));
|
||||
|
||||
// 策略:逐步缩减直到满足 token 限制
|
||||
// 1. changedFiles元数据(小且必需)
|
||||
// 2. parsedDiff(关键,逐步减少每个文件的changes数量)
|
||||
// 3. fileContents(最大,按需截断或移除部分文件)
|
||||
|
||||
let maxChangesPerFile = 200;
|
||||
let maxFileContentsEntries = Object.keys(scopedFileContents).length;
|
||||
|
||||
const tryBuild = (changesLimit: number, contentEntriesLimit: number): string => {
|
||||
const snippets = scopedParsedDiff.map((file) => ({
|
||||
path: file.path,
|
||||
changes: file.changes.slice(0, changesLimit),
|
||||
}));
|
||||
|
||||
const limitedContents: Record<string, string> = {};
|
||||
const contentKeys = Object.keys(scopedFileContents);
|
||||
for (let i = 0; i < Math.min(contentEntriesLimit, contentKeys.length); i++) {
|
||||
const key = contentKeys[i];
|
||||
limitedContents[key] = scopedFileContents[key];
|
||||
}
|
||||
|
||||
return JSON.stringify(
|
||||
{
|
||||
changedFiles: files,
|
||||
diffSnippets: snippets,
|
||||
fileContents: limitedContents,
|
||||
},
|
||||
null,
|
||||
2
|
||||
);
|
||||
};
|
||||
|
||||
let result = tryBuild(maxChangesPerFile, maxFileContentsEntries);
|
||||
|
||||
// 如果超过 token 限制,逐步缩减
|
||||
while (
|
||||
tokenCounter.count(result) > MAX_CONTEXT_TOKENS &&
|
||||
(maxChangesPerFile > 20 || maxFileContentsEntries > 0)
|
||||
) {
|
||||
if (maxChangesPerFile > 20) {
|
||||
maxChangesPerFile = Math.max(20, Math.floor(maxChangesPerFile * 0.7));
|
||||
} else if (maxFileContentsEntries > 0) {
|
||||
maxFileContentsEntries = Math.max(0, Math.floor(maxFileContentsEntries * 0.5));
|
||||
}
|
||||
|
||||
result = tryBuild(maxChangesPerFile, maxFileContentsEntries);
|
||||
}
|
||||
|
||||
// 如果仍然超限,强制截断
|
||||
if (tokenCounter.count(result) > MAX_CONTEXT_TOKENS) {
|
||||
logger.warn('Context size still exceeds token limit after reduction, truncating', {
|
||||
estimatedTokens: tokenCounter.count(result),
|
||||
limit: MAX_CONTEXT_TOKENS,
|
||||
});
|
||||
result = tokenCounter.clip(result, MAX_CONTEXT_TOKENS);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
export class SpecialistAgent {
|
||||
constructor(
|
||||
protected readonly gateway: LLMGateway,
|
||||
protected readonly category: FindingCategory,
|
||||
protected readonly agentName: string,
|
||||
protected readonly focusPrompt: string,
|
||||
protected readonly toolRegistry?: ToolRegistry,
|
||||
protected readonly learningSystem?: LearningSystem
|
||||
) {}
|
||||
|
||||
async review(run: ReviewRun, context: ReviewContext): Promise<AgentResult> {
|
||||
return this.reviewWithOptions(run, context);
|
||||
}
|
||||
|
||||
async reviewWithOptions(
|
||||
run: ReviewRun,
|
||||
context: ReviewContext,
|
||||
options?: SpecialistReviewOptions
|
||||
): Promise<AgentResult> {
|
||||
if (!context.diff.trim()) {
|
||||
return { agentName: this.agentName, findings: [] };
|
||||
}
|
||||
|
||||
if (options?.mode === 'skip') {
|
||||
return { agentName: this.agentName, findings: [] };
|
||||
}
|
||||
|
||||
if (
|
||||
!this.toolRegistry ||
|
||||
this.toolRegistry.getAll().length === 0 ||
|
||||
options?.allowTools === false
|
||||
) {
|
||||
return this.reviewSinglePass(run, context, options);
|
||||
}
|
||||
|
||||
// ReAct循环模式
|
||||
return this.reviewWithReAct(run, context, options);
|
||||
}
|
||||
|
||||
private async reviewSinglePass(
|
||||
run: ReviewRun,
|
||||
context: ReviewContext,
|
||||
options?: SpecialistReviewOptions
|
||||
): Promise<AgentResult> {
|
||||
const prompt = `你是${this.agentName},只关注${this.focusPrompt}。
|
||||
输出必须是JSON对象格式:
|
||||
{"findings": [{"severity": "high"|"medium"|"low", "confidence": 0-1, "path": "文件路径", "line": 正整数, "title": "标题", "detail": "详情", "evidence": "证据", "suggestion": "建议"}]}
|
||||
每个 finding 的所有字段都是必填的。仅报告有明确证据的问题;无问题时返回空数组。
|
||||
|
||||
审查上下文如下:
|
||||
${toCompactContext(context, {
|
||||
scopePaths: options?.scopePaths,
|
||||
maxContextTokens: options?.maxContextTokens,
|
||||
})}`;
|
||||
|
||||
try {
|
||||
const messages: LLMMessage[] = [
|
||||
{
|
||||
role: 'system',
|
||||
content: withGlobalPrompt(
|
||||
'你是严格的代码审查专家。返回结构化JSON,不输出额外文字。confidence取值范围0到1。line必须是正整数且引用新增行。',
|
||||
mergeReviewPrompts(config.review.globalPrompt, options?.projectPrompt)
|
||||
),
|
||||
},
|
||||
{ role: 'user', content: prompt },
|
||||
];
|
||||
|
||||
const response = await this.gateway.chatForRole('specialist', {
|
||||
messages,
|
||||
temperature: 0,
|
||||
responseFormat: 'json',
|
||||
});
|
||||
|
||||
const content = response.content;
|
||||
if (!content) {
|
||||
return { agentName: this.agentName, findings: [] };
|
||||
}
|
||||
|
||||
const parsed = findingResponseSchema.parse(JSON.parse(content));
|
||||
const findings = parsed.findings.map((item) => ({
|
||||
...item,
|
||||
category: this.category,
|
||||
fingerprint:
|
||||
item.fingerprint || buildFingerprint(this.category, item.path, item.line, item.title),
|
||||
}));
|
||||
|
||||
return {
|
||||
agentName: this.agentName,
|
||||
findings,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error(`${this.agentName} 执行失败`, {
|
||||
runId: run.id,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
return { agentName: this.agentName, findings: [] };
|
||||
}
|
||||
}
|
||||
|
||||
private async reviewWithReAct(
|
||||
run: ReviewRun,
|
||||
context: ReviewContext,
|
||||
options?: SpecialistReviewOptions
|
||||
): Promise<AgentResult> {
|
||||
const maxIterations = Math.max(
|
||||
1,
|
||||
options?.maxIterations ?? (options?.mode === 'light' ? 1 : 2)
|
||||
);
|
||||
const findingsMap = new Map<string, Omit<Finding, 'id' | 'runId' | 'published'>>();
|
||||
const compactContext = toCompactContext(context, {
|
||||
scopePaths: options?.scopePaths,
|
||||
maxContextTokens: options?.maxContextTokens,
|
||||
});
|
||||
const messages: LLMMessage[] = [
|
||||
{
|
||||
role: 'system',
|
||||
content: withGlobalPrompt(
|
||||
`你是${this.agentName},专注于${this.focusPrompt}。
|
||||
|
||||
你可以使用以下工具进行深入调查:
|
||||
${this.toolRegistry!.getAll()
|
||||
.map((t) => `- ${t.name}: ${t.description}`)
|
||||
.join('\n')}
|
||||
|
||||
工作流程:
|
||||
1. 分析给定的代码变更
|
||||
2. 如需更多信息,使用工具调查(如搜索相似代码、分析函数调用)
|
||||
3. 基于证据报告问题
|
||||
|
||||
当你需要使用工具时,直接调用工具即可。
|
||||
当你完成所有调查并准备输出最终结果时,以纯JSON格式返回(不要包含任何额外文字):
|
||||
{
|
||||
"findings": [
|
||||
{
|
||||
"severity": "high" | "medium" | "low",
|
||||
"confidence": 0.0 到 1.0 之间的数字,
|
||||
"path": "文件路径",
|
||||
"line": 正整数,引用新增行的行号,
|
||||
"title": "问题简短标题",
|
||||
"detail": "问题详细描述",
|
||||
"evidence": "相关代码片段或证据",
|
||||
"suggestion": "修复建议"
|
||||
}
|
||||
],
|
||||
"need_more_investigation": false
|
||||
}
|
||||
每个 finding 对象的所有字段都是必填的。无问题时返回空数组 {"findings": [], "need_more_investigation": false}。`,
|
||||
mergeReviewPrompts(config.review.globalPrompt, options?.projectPrompt)
|
||||
),
|
||||
},
|
||||
];
|
||||
|
||||
// 添加Few-shot示例(如果学习系统可用)
|
||||
if (this.learningSystem) {
|
||||
try {
|
||||
const fewShotExamples = await this.learningSystem.generateFewShotExamples(
|
||||
this.category,
|
||||
run.owner,
|
||||
run.repo
|
||||
);
|
||||
if (fewShotExamples.length > 0) {
|
||||
const llmFewShotExamples = fewShotExamples
|
||||
.map((msg) => {
|
||||
if (
|
||||
(msg.role === 'system' || msg.role === 'user' || msg.role === 'assistant') &&
|
||||
typeof msg.content === 'string'
|
||||
) {
|
||||
return { role: msg.role, content: msg.content } as const;
|
||||
}
|
||||
return null;
|
||||
})
|
||||
.filter(
|
||||
(msg): msg is { role: 'system' | 'user' | 'assistant'; content: string } =>
|
||||
msg !== null
|
||||
);
|
||||
|
||||
messages.push(...llmFewShotExamples);
|
||||
logger.debug(`${this.agentName} 添加了 ${fewShotExamples.length} 条Few-shot示例`, {
|
||||
runId: run.id,
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warn(`${this.agentName} Few-shot示例生成失败`, {
|
||||
runId: run.id,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 添加当前审查任务
|
||||
messages.push({
|
||||
role: 'user',
|
||||
content: `审查以下代码变更:\n${compactContext}`,
|
||||
});
|
||||
|
||||
try {
|
||||
for (let iteration = 0; iteration < maxIterations; iteration++) {
|
||||
logger.info(`${this.agentName} ReAct迭代 ${iteration + 1}/${maxIterations}`, {
|
||||
runId: run.id,
|
||||
});
|
||||
|
||||
// 仅在最后一轮迭代强制 JSON 输出(无工具调用时解析结果)
|
||||
// 避免 response_format: json_object 与 tools 参数冲突导致工具不被调用
|
||||
const isLastIteration = iteration === maxIterations - 1;
|
||||
const response = await this.gateway.chatForRole('specialist', {
|
||||
messages,
|
||||
temperature: 0,
|
||||
tools: this.toolRegistry!.toToolDefinitions(),
|
||||
providerOptions: { tool_choice: isLastIteration ? 'none' : 'auto' },
|
||||
responseFormat: isLastIteration ? 'json' : undefined,
|
||||
});
|
||||
|
||||
// 处理工具调用
|
||||
if (response.toolCalls.length > 0) {
|
||||
messages.push({
|
||||
role: 'assistant',
|
||||
content: response.content || '',
|
||||
toolCalls: response.toolCalls,
|
||||
});
|
||||
|
||||
// 执行所有工具调用
|
||||
const toolResults = await this.executeTools(response.toolCalls, {
|
||||
workspacePath: context.workspacePath,
|
||||
mirrorPath: context.mirrorPath,
|
||||
runId: run.id,
|
||||
});
|
||||
|
||||
// 添加工具结果到对话
|
||||
for (const toolResult of toolResults) {
|
||||
messages.push({
|
||||
role: 'tool',
|
||||
toolCallId: toolResult.toolCallId,
|
||||
content: JSON.stringify(toolResult.result || { error: toolResult.error }),
|
||||
});
|
||||
}
|
||||
|
||||
continue; // 继续下一轮
|
||||
}
|
||||
|
||||
// 解析findings(模型选择返回内容而非调用工具)
|
||||
if (response.content) {
|
||||
try {
|
||||
const parsed = JSON.parse(response.content);
|
||||
|
||||
if (parsed.findings && parsed.findings.length > 0) {
|
||||
// 使用schema验证findings,防止畸形数据流入发布系统
|
||||
const validated = findingResponseSchema.parse({ findings: parsed.findings });
|
||||
for (const item of validated.findings) {
|
||||
const fp =
|
||||
item.fingerprint ||
|
||||
buildFingerprint(this.category, item.path, item.line, item.title);
|
||||
// 基于 fingerprint 去重:后续迭代产生的同一 finding 覆盖前一次
|
||||
findingsMap.set(fp, {
|
||||
...item,
|
||||
category: this.category,
|
||||
fingerprint: fp,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 判断是否需要继续调查
|
||||
if (!parsed.need_more_investigation) {
|
||||
break;
|
||||
}
|
||||
|
||||
// 模型要求继续调查但没有调用工具:注入 user 消息打破潜在的自我重复
|
||||
messages.push({
|
||||
role: 'assistant',
|
||||
content: response.content,
|
||||
});
|
||||
messages.push({
|
||||
role: 'user',
|
||||
content:
|
||||
'请使用工具进行更深入的调查。如果你已经获得了足够的信息,请将 need_more_investigation 设为 false 并输出最终结果。',
|
||||
});
|
||||
} catch (parseError) {
|
||||
// 模型返回了非 JSON 文本(如中文自然语言),不应直接放弃
|
||||
// 将其作为对话上下文保留,提示模型返回 JSON 格式
|
||||
logger.warn(`${this.agentName} 响应非 JSON 格式,尝试引导模型返回 JSON`, {
|
||||
runId: run.id,
|
||||
error: parseError instanceof Error ? parseError.message : String(parseError),
|
||||
});
|
||||
messages.push({
|
||||
role: 'assistant',
|
||||
content: response.content,
|
||||
});
|
||||
messages.push({
|
||||
role: 'user',
|
||||
content:
|
||||
'你的上一次响应不是有效的 JSON。请以纯 JSON 格式返回结果:{"findings": [...], "need_more_investigation": false}。不要包含任何额外文字。',
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// 没有内容,结束循环
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return { agentName: this.agentName, findings: Array.from(findingsMap.values()) };
|
||||
} catch (error) {
|
||||
logger.error(`${this.agentName} ReAct执行失败`, {
|
||||
runId: run.id,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
return { agentName: this.agentName, findings: [] };
|
||||
}
|
||||
}
|
||||
|
||||
private async executeTools(
|
||||
toolCalls: LLMToolCall[],
|
||||
context: ToolExecutionContext
|
||||
): Promise<ToolResult[]> {
|
||||
const results: ToolResult[] = [];
|
||||
|
||||
for (const toolCall of toolCalls) {
|
||||
const tool = this.toolRegistry!.get(toolCall.name);
|
||||
|
||||
if (!tool) {
|
||||
results.push({
|
||||
toolCallId: toolCall.id,
|
||||
success: false,
|
||||
error: `工具 ${toolCall.name} 未找到`,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const params = JSON.parse(toolCall.arguments);
|
||||
const result = await tool.execute(params, context);
|
||||
|
||||
logger.info(`工具调用成功: ${toolCall.name}`, {
|
||||
runId: context.runId,
|
||||
params,
|
||||
});
|
||||
|
||||
results.push({
|
||||
toolCallId: toolCall.id,
|
||||
success: true,
|
||||
result,
|
||||
});
|
||||
} catch (error) {
|
||||
logger.error(`工具调用失败: ${toolCall.name}`, {
|
||||
runId: context.runId,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
|
||||
results.push({
|
||||
toolCallId: toolCall.id,
|
||||
success: false,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user