28 Commits

Author SHA1 Message Date
jeffusion
2fac1f6942 fix(review): treat triage entrypoints as hints 2026-05-20 01:55:23 +08:00
jeffusion
45fcf2eaa1 refactor(review): remove domain specialist review path 2026-05-20 01:41:23 +08:00
jeffusion
d48eee3474 test(review): verify autonomous review behavior 2026-05-20 01:17:21 +08:00
jeffusion
c0de9238b5 refactor(review): normalize autonomous findings deterministically 2026-05-20 00:46:50 +08:00
jeffusion
aa8d4ab072 refactor(kernel): dispatch single autonomous review subagent 2026-05-20 00:35:22 +08:00
jeffusion
1831704644 refactor(review): make triage produce review hints 2026-05-20 00:11:18 +08:00
jeffusion
f0e45a5ae5 feat(review): add autonomous review agent loop 2026-05-19 23:51:40 +08:00
jeffusion
0ad83a4082 refactor(review): define autonomous review contract 2026-05-19 23:34:56 +08:00
jeffusion
eeb209dbaf chore(runtime): apply resilience config at startup
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-05-12 14:48:23 +08:00
jeffusion
e1d8c1b7d2 chore(review): remove triage cleanup residue
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-05-12 14:48:23 +08:00
jeffusion
6d62b9f87c test(review): preserve kernel resume invariants
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-05-12 14:48:22 +08:00
jeffusion
bcc9e7b8eb test(review): assert concise review comments
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-05-12 14:48:22 +08:00
jeffusion
12e1f4717b fix(review): dedupe repeated specialist findings
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-05-12 14:47:35 +08:00
jeffusion
6ca9edecfd fix(review): require code reading before specialist findings
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-05-12 14:47:35 +08:00
jeffusion
c4cbced8af fix(llm): pass tool choice to OpenAI-compatible providers
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-05-12 14:47:35 +08:00
jeffusion
e0ab3019db refactor(admin): simplify model route assignment UI
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-05-12 14:46:58 +08:00
jeffusion
cd2bdf4131 feat(db): migrate model roles to planner and specialist
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-05-12 14:46:58 +08:00
jeffusion
b304814e42 refactor(llm): drop judge and embedding roles
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-05-12 14:46:58 +08:00
jeffusion
1ff629cffb refactor(review): remove reflexion agent wiring
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-05-12 14:46:28 +08:00
jeffusion
8ccc7452e5 refactor(review): remove legacy publishing policy path
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-05-12 14:46:28 +08:00
jeffusion
b2b914f919 refactor(review): remove obsolete memory and learning modules
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-05-12 14:46:28 +08:00
jeffusion
7b9b9e69a7 refactor(config): remove retired review settings
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-05-12 14:45:56 +08:00
jeffusion
46c5e09a62 fix(e2e): stabilize kernel engine startup 2026-05-07 09:48:13 +08:00
jeffusion
1a43b1f206 docs(kernel): describe built-in subagents 2026-05-07 00:13:35 +08:00
jeffusion
1b26fac951 test(e2e): automate kernel review flow 2026-05-07 00:13:19 +08:00
jeffusion
38e4c58d71 feat(admin): add kernel review session console 2026-05-07 00:12:33 +08:00
jeffusion
5b29e2d4af feat(review): replace orchestrator with kernel engine 2026-05-07 00:12:20 +08:00
jeffusion
ac40957ede feat(kernel): add extensible agent runtime 2026-05-07 00:12:10 +08:00
138 changed files with 13662 additions and 5384 deletions

View File

@@ -52,3 +52,58 @@ jobs:
path: |
frontend/playwright-report/
frontend/test-results/
e2e:
runs-on: ubuntu-22.04
needs: test
services:
gitea:
image: gitea/gitea:1.22
ports: ['3333:3000']
env:
GITEA__database__DB_TYPE: sqlite3
GITEA__server__ROOT_URL: http://localhost:3333
GITEA__security__INSTALL_LOCK: true
GITEA__webhook__ALLOWED_HOST_LIST: '*'
GITEA__webhook__SKIP_TLS_VERIFY: true
options: >-
--health-cmd "curl -f http://localhost:3000/api/v1/version"
--health-interval 5s
--health-timeout 3s
--health-retries 20
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Bun
uses: oven-sh/setup-bun@v2
with:
bun-version: 1.3.10
- name: Install dependencies
run: bun install --frozen-lockfile
- name: Install git
run: sudo apt-get update && sudo apt-get install -y git
- name: Create Gitea admin user
run: |
for i in $(seq 1 10); do
if docker exec $(docker ps -q --filter "ancestor=gitea/gitea:1.22") \
gitea admin user create --username e2e-admin --password 'e2ePassword123!' --email 'e2e@test.local' --admin 2>/dev/null; then
echo "User created"
break
fi
echo "Retrying... ($i)"
sleep 3
done || true
docker exec -u git $(docker ps -q --filter "ancestor=gitea/gitea:1.22") \
gitea admin user create --username e2e-admin --password 'e2ePassword123!' --email 'e2e@test.local' --admin 2>/dev/null || true
- name: Run E2E tests
run: bun run test:e2e
env:
E2E_GITEA_URL: http://localhost:3333
E2E_MOCK_LLM: 1

View File

@@ -1,14 +1,8 @@
version: '3.8'
# E2E 测试环境Gitea + gitea-assistant
# 用法:
# docker compose -f docker-compose.e2e.yml up -d
# # 等待服务启动后运行 seed 脚本:
# ./e2e/seed.sh
# # 运行 E2E 测试:
# ./e2e/test.sh
# # 清理:
# docker compose -f docker-compose.e2e.yml down -v
# docker compose -f docker-compose.e2e.yml up -d
# ./e2e/seed.sh
# docker compose -f docker-compose.e2e.yml down -v
services:
gitea:
@@ -47,8 +41,15 @@ services:
- GITEA_API_URL=http://gitea:3000/api/v1
- GITEA_ACCESS_TOKEN=${E2E_GITEA_TOKEN:-placeholder}
- PORT=5174
- ENCRYPTION_KEY=5752fac0e57d00e9b7954863faef878693420e6b06bc20d710897587e802668a
- REVIEW_ENGINE=kernel
- REVIEW_WORKDIR=/tmp/e2e-review
- DATABASE_PATH=/data/assistant.db
- E2E_MOCK_LLM=1
ports:
- "3334:5174"
volumes:
- assistant-data:/data
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:5174/api/health"]
interval: 5s
@@ -58,3 +59,4 @@ services:
volumes:
gitea-data:
assistant-data:

View File

@@ -13,6 +13,7 @@ This project keeps the root `README.md` concise and moves implementation/deploym
## Architecture & design
- [Pluggable LLM providers](./design/pluggable-llm-providers.md)
- [Kernel built-in Agent architecture](./design/kernel-built-in-agents.md)
- [Notification service refactoring](./design/notification-service-refactoring.md)
- [UI theme language](./design/ui-theme-language.md)

View File

@@ -13,6 +13,7 @@
## 架构与设计
- [可插拔 LLM 提供商设计](./design/pluggable-llm-providers.md)
- [Kernel 内置 Agent 架构设计](./design/kernel-built-in-agents.md)
- [通知服务重构设计](./design/notification-service-refactoring.md)
- [UI 主题语言设计](./design/ui-theme-language.md)

View File

@@ -0,0 +1,890 @@
# 技术设计文档Kernel 内置 Agent 架构
> **状态**: Draft
> **作者**: AI Architect
> **日期**: 2026-04-28
> **相关模块**: `src/agent-kernel/`、`src/review/kernel/`
> **适用范围**: Review Kernel 的内置 subagent 体系、运行时委派、管理后台可观测能力与生产测试门禁
---
## 目录
- [0. 文档信息](#0-文档信息)
- [1. 背景与目标](#1-背景与目标)
- [2. 设计原则与关键取舍](#2-设计原则与关键取舍)
- [3. 概要设计](#3-概要设计)
- [4. 内置 Agent 详细设计](#4-内置-agent-详细设计)
- [4.8 Agent工作机制详解](#48-agent工作机制详解)
- [5. 运行时与状态设计](#5-运行时与状态设计)
- [6. API 与管理后台可观测性](#6-api-与管理后台可观测性)
- [7. 非功能性设计](#7-非功能性设计)
- [8. 测试与上线验证](#8-测试与上线验证)
- [9. 风险、待确认与后续演进](#9-风险待确认与后续演进)
---
## 0. 文档信息
| 字段 | 内容 |
|---|---|
| 版本 | v0.1 |
| 状态 | 草案 |
| 目标读者 | 研发 / 架构 / QA / 运维 / 管理后台开发 |
| 系统类型 | AI 应用工程 / 后端 Agent Runtime / 审查系统适配层 |
| 主要代码路径 | `src/agent-kernel/``src/review/kernel/` |
| 相关配置 | `REVIEW_ENGINE=kernel` |
### Assumptions
- 当前项目已选择 **kernel-first** 作为代码审查主路径;旧固定 agent 编排不作为未来运行时主路径。
- 内置 Agent 当前以 **built-in subagent definition** 的方式注册,后续可演进到 plugin/custom subagent 加载。
- 一条 PR 对应一个 kernel sessioncommit 更新、人工反馈和后续恢复都写入同一 session。
### To Be Confirmed
- 是否需要把 built-in subagent 的定义从 TypeScript 代码进一步外置为 YAML/JSON/插件目录。
- 管理后台是否需要支持逐 subagent 的启用/禁用、版本选择与灰度策略。
---
## 1. 背景与目标
### 1.1 背景
早期审查系统采用固定流程编排triage 后按审查域派生多个 specialist再由额外阶段汇总。该方案的问题是
- 流程扩展需要修改 orchestrator/runtime 代码;
- 角色能力与执行链路耦合,难以按能力标签选择代理;
- 缺少独立 subagent identity、delegation boundary 和 invocation trace
- 管理后台难以展示“有哪些 Agent、何时被调用、产生了什么结果”
- 恢复、压缩、权限、hook 等横切能力难以统一接入。
新的 Kernel 内置 Agent 架构将 review 角色转换为注册式 built-in subagents`AgentKernelRunner` 根据 planner 输出与 session state 推进任务,并通过 `KernelAgentInvoker` 统一委派执行。
### 1.2 核心目标
| 目标 | 说明 |
|---|---|
| 注册式扩展 | 内置 Agent 以 `KernelSubagentDefinition` 注册runtime 不硬编码角色实例 |
| 能力选择 | planner 通过 tags/capabilities 选择 subagent而不是写死 agent id |
| 可恢复执行 | session checkpoint 持久化 state + pendingTasks支持 feedback 后继续执行 |
| 委派边界 | 每次 subagent 调用都有 agentId、delegation packet、invocation record、structured result |
| 上下文压缩 | 大上下文触发 compressionsummary 写入 checkpoint 并回注后续 subagent |
| 工具治理 | 工具调用走统一 orchestration、permission gating 与 hooks |
| 可观测性 | 管理 API 暴露 task/subagent/hook catalog、session timeline、subagent invocations |
### 1.3 范围与非范围
**范围内**
- Review Kernel 内置 subagents 的定义、职责、标签、运行链路;
- Kernel agent registry / invoker / runner 与 session checkpoint 的协作;
- 内置 Agent 与 tools、hooks、permission、compression 的集成方式;
- 管理后台需要消费的 catalog 与 session 投影视图;
- 生产前自动化测试门禁。
**范围外**
- 前端 UI 视觉设计细节;
-`agent` 固定编排引擎兼容;
- Codex CLI 引擎内部实现;
- 通用插件市场、远程 agent 执行后端和多租户权限模型。
---
## 2. 设计原则与关键取舍
### 2.1 核心设计原则
| 原则 | 落地方式 |
|---|---|
| 高内聚低耦合 | `src/agent-kernel/` 只提供通用 session/runner/registry/invoker/hooksreview 逻辑放在 `src/review/kernel/` |
| 开闭原则 | 新增流程能力优先增加 subagent、skill、hook 或 tool而不是修改主循环 |
| Session 为状态源 | PR/commit session 记录 event、checkpoint、subagent invocation是恢复与投影的事实来源 |
| 可观测优先 | 每次 subagent 调用持久化 invocation每个 task 写入 started/completed/failed event |
| 安全默认 | 工具执行统一经过 permission gating高风险 scope 默认 ask/deny |
| 可测试 | 断言面落在 checkpoint、events、invocations、tool result、admin projection而不是完整 LLM 文本 |
### 2.2 关键取舍
| 取舍点 | 选择 | 原因 |
|---|---|---|
| 内置 Agent 表达方式 | TypeScript built-in definitions | 当前阶段需要强类型、低迁移成本;后续可迁移到 plugin loader |
| Agent 调用入口 | `KernelAgentInvoker` 统一调用 | 统一 agentId、hook、invocation persistence、structured result |
| 流程推进方式 | planner + session state | 避免静态任务数组;支持继续执行与人审恢复 |
| Findings 处理 | 本地归一化、去重、排序与发布 | full review 只产出 findings后续由 skill/本地逻辑保证确定性 |
| 压缩策略 | planner 模型窗口 80% 触发 | 使用 tokenlens context window预留 20% 冗余 |
| 管理接口 | task/subagent/hook catalog + session detail | 让后台可解释当前能力目录与执行轨迹 |
---
## 3. 概要设计
### 3.1 总体架构
```mermaid
flowchart TB
Webhook[Gitea Webhook / Feedback] --> Engine[KernelReviewEngine]
Engine --> Session[(Kernel Session Repository)]
Engine --> Runtime[ReviewKernelRuntime]
Runtime --> Runner[AgentKernelRunner]
Runtime --> SkillRegistry[KernelTaskRegistry / Skills]
Runtime --> AgentRegistry[KernelAgentRegistry / Built-in Subagents]
Runtime --> HookRegistry[KernelHookRegistry]
Runtime --> ToolRegistry[ToolRegistry]
Runner --> Planner[State-driven Planner]
Planner --> SkillTask[Skill Task]
Planner --> SubagentTask[Subagent Task]
SkillTask --> SkillRegistry
SubagentTask --> Invoker[KernelAgentInvoker]
Invoker --> AgentContext[AsyncLocalStorage Agent Context]
Invoker --> Invocation[(Subagent Invocation Record)]
Invoker --> Builtins[Review Built-in Subagents]
Builtins --> Triage[review:triage]
Builtins --> FullReview[review:full_review]
FullReview --> ToolOrchestration[Tool Orchestration]
ToolOrchestration --> Permission[Permission Gating]
ToolOrchestration --> Hooks[Pre/Post Tool Hooks]
Runtime --> AdminAPI[Admin API Catalog / Session Projection]
```
### 3.2 模块职责
| 模块 | 文件 | 职责 |
|---|---|---|
| Kernel types | `src/agent-kernel/types.ts` | 定义 task、subagent、delegation packet、checkpoint、invocation result |
| Agent registry | `src/agent-kernel/agents/kernel-agent-registry.ts` | 注册、查询、按 tag 过滤 subagent |
| Agent invoker | `src/agent-kernel/agents/kernel-agent-invoker.ts` | 创建 agentId、触发 hook、持久化 invocation、执行 subagent |
| Agent context | `src/agent-kernel/agents/kernel-agent-context.ts` | 使用 AsyncLocalStorage 隔离子代理执行上下文 |
| Runner | `src/agent-kernel/runtime/agent-kernel-runner.ts` | 按 planner 结果推进 skill/subagent task写 checkpoint 与 task event |
| Session repo | `src/agent-kernel/session/session-repository.ts` | 持久化 session、events、checkpoint、subagent invocations |
| Review runtime | `src/review/kernel/review-kernel-runtime.ts` | 注册 skills/hooks/built-in subagents提供 execute/continueExecution |
| Built-in subagents | `src/review/kernel/review-built-in-subagents.ts` | 将 triage 与 full_review 转换为注册式 subagent definitions |
| Subagent ids | `src/review/kernel/review-subagent-ids.ts` | 统一内置 subagent id 命名 |
| Admin projection | `src/review/kernel/session-read-model.ts` | 将 session event/checkpoint/invocation 投影为后台视图 |
### 3.3 核心执行链路
```mermaid
sequenceDiagram
participant E as KernelReviewEngine
participant R as ReviewKernelRuntime
participant S as SessionRepository
participant K as AgentKernelRunner
participant I as KernelAgentInvoker
participant A as Built-in Subagent
E->>S: ensureSession(scopeKey)
E->>R: execute(run, sessionId)
R->>S: appendEvent(run_started)
R->>K: run(initialState, initialTasks=[])
loop until stopReason
K->>K: planner.plan(state)
alt skill task
K->>R: execute skill handler
else subagent task
K->>I: invoke(task, context)
I->>S: createSubagentInvocation(running)
I->>A: execute(task, agentContext)
A-->>I: KernelHandlerResult
I->>S: completeSubagentInvocation(completed)
end
K->>S: appendEvent(task_completed)
K->>S: saveCheckpoint(state, pendingTasks, stopReason)
end
R->>S: appendEvent(run_completed)
```
---
## 4. 内置 Agent 详细设计
### 4.1 内置 Agent 目录
| Subagent ID | Source | Model Role | Tags | 职责 | 触发条件 |
|---|---|---|---|---|---|
| `review:triage` | `built-in` | `planner` | `review`, `planner`, `triage` | 根据 diff、文件、风险生成自主审查提示、模式和预算 | build context 完成且尚无 triage 结果 |
| `review:full_review` | `built-in` | `specialist` | `review`, `specialist`, `full-review`, `autonomous-review` | 执行一次完整自主代码审查,模型自行选择工具和调查路径 | triage 完成且尚未完成 full review |
### 4.2 Subagent Definition 契约
每个内置 Agent 必须实现 `KernelSubagentDefinition<TState>`
```typescript
interface KernelSubagentDefinition<TState> {
kind: 'subagent';
name: string;
source: 'built-in' | 'custom' | 'plugin';
description: string;
whenToUse: string;
tags?: string[];
modelRole?: string;
resumable?: boolean;
execute(task, context): Promise<KernelHandlerResult<TState> | undefined>;
}
```
关键约束:
- `name` 必须稳定,作为 session event、invocation、admin catalog 的统一标识;
- `tags` 必须包含能力标签planner 只能按 tag/capability 选择代理;
- `whenToUse` 既用于管理后台解释,也用于 delegation packet 的 goal
- `execute` 不直接控制主循环,只返回 state/enqueue/prepend/stopReason
- 内置 Agent 不应越权直接修改 pendingTasks除非通过标准 `KernelHandlerResult`
### 4.3 Planner 选择规则
`ReviewKernelRuntime.planTasks()` 根据 checkpoint state 推导下一步:
```mermaid
flowchart TD
A[开始 plan] --> B{有 pendingTasks?}
B -- 是 --> Z[不新增任务]
B -- 否 --> C{缺 workspace?}
C -- 是 --> PW[prepare_workspace skill]
C -- 否 --> D{缺 context?}
D -- 是 --> BC[build_context skill]
D -- 否 --> E{需要压缩?}
E -- 是 --> CC[compress_context skill]
E -- 否 --> F{缺 triage?}
F -- 是 --> T[按 tag=triage 选择 review:triage]
F -- 否 --> G{full review 未完成?}
G -- 是 --> S[执行 review:full_review]
G -- 否 --> P{未 publish?}
P -- 是 --> PR[publish_review skill]
P -- 否 --> R{未保存 reviewed ref?}
R -- 是 --> SR[save_reviewed_ref skill]
R -- 否 --> DONE[completed]
```
### 4.4 Triage Agent
`review:triage` 包装 `TriageAgent`,输出自主审查提示:
- 使用 `planner` 模型角色;
- 接收 `projectPrompt``compressedContext.summary`
- 生成 `mode``reviewSize``riskTags``suspectedEntrypoints` 与预算提示;
- 提示只影响 full review 的调查起点,不拆分审查任务。
### 4.5 Autonomous Full Review Agent
`review:full_review` 包装 `AutonomousReviewAgent`
- 共享 `ToolRegistry``KernelHookRegistry`
- 根据 `ReviewTask` 控制 mode、reviewSize、riskTags、suspectedEntrypoints、maxTurns、maxToolCalls、maxElapsedMs、tokenBudget
- 支持压缩 summary 回注到 prompt
- 不预拆 correctness/security/quality 子任务,模型在一次自主循环内跨文件调查;
- 工具调用统一经过 tool orchestration、permission gating、Pre/Post tool hooks。
### 4.6 Aggregate Findings Skill
`aggregate_findings` 是 full review 后的确定性本地步骤:
- 接收 `review:full_review` 产出的 findings
- 归一化 category/severity/confidence补齐 fingerprint
- 按 fingerprint 去重,并按 severity/path/line/title 稳定排序;
- 写回 checkpoint供后续发布步骤使用。
### 4.7 Publish and Save Skills
`publish_review``save_reviewed_ref` 负责外部副作用:
- `publish_review` 生成确定性 summary并发布 PR summary 与 line comments
- `save_reviewed_ref` 在本地 mirror 保存已审查 ref用于后续增量审查
- 两个步骤分离,避免评论发布和 ref 保存互相污染,失败时依赖 checkpoint 重试。
---
## 4.8 Agent工作机制详解
本节详细说明 Kernel Agent 的运转机制、任务调度、工具调用、决策逻辑及边界划分。
### 4.8.1 核心运转架构
Kernel 采用「**事件驱动 + 状态机**」的运行模式:
```mermaid
flowchart LR
Webhook[Gitea Webhook / Feedback] --> Engine[KernelReviewEngine]
Engine --> Session[Session Repository]
Engine --> Runtime[ReviewKernelRuntime]
Runtime --> Runner[AgentKernelRunner]
Runner --> Planner[Turn Planner]
Planner --> Tasks[Tasks Queue]
Tasks --> Executor[Task Executor]
Executor --> State[State Update]
State --> Checkpoint[Checkpoint Save]
Checkpoint --> Runner
```
**关键组件职责**:
| 组件 | 文件 | 核心职责 |
|------|------|----------|
| **AgentKernelRunner** | `agent-kernel-runner.ts` | 主循环控制器任务调度、状态流转、checkpoint 管理 |
| **ReviewKernelRuntime** | `review-kernel-runtime.ts` | Review 业务运行时:封装 skills、subagents、hooks、tools |
| **KernelTurnPlanner** | `review-kernel-runtime.ts:305-361` | 基于当前 state 决定下一步执行什么任务 |
### 4.8.2 核心运转流程
**1. 启动阶段**:
```typescript
// PR webhook 触发
kernelReviewEngine.enqueuePullRequest(payload)
ensureSession(scopeKey) // 创建或复用 session
runtime.execute(run, sessionId) // 启动运行时
AgentKernelRunner.run({ // 启动主循环
sessionId,
initialState: {...},
initialTasks: []
})
```
**2. 主循环机制** (`AgentKernelRunner.run`):
```typescript
async run({ sessionId, initialState, initialTasks, continueExisting }) {
// 从 checkpoint 恢复状态(支持继续执行)
const persisted = loadCheckpoint(sessionId);
let state = persisted?.state ?? initialState;
const pendingTasks = [...(persisted?.pendingTasks ?? initialTasks)];
// 主循环:直到有 stopReason
while (!stopReason) {
// 如果没有待执行任务,让 planner 规划新任务
if (pendingTasks.length === 0) {
const planned = planner.plan({ session, state, pendingTasks });
pendingTasks.push(...planned);
}
// 取出下一个任务
const task = pendingTasks.shift();
// 执行任务
const result = await executeTask(task, context);
// 处理执行结果
if (result?.state) state = result.state; // 更新状态
if (result?.prepend) pendingTasks.unshift(...result.prepend); // 前置任务
if (result?.enqueue) pendingTasks.push(...result.enqueue); // 后置任务
if (result?.stopReason) stopReason = result.stopReason; // 停止原因
// 保存 checkpoint支持失败恢复
saveCheckpoint(sessionId, { state, pendingTasks, stopReason });
}
}
```
**3. 恢复机制** (`continueExisting`):
- 从 SQLite 加载持久化的 checkpoint
- 恢复 `state``pendingTasks`
- **显式忽略**旧 checkpoint 的 `stopReason`,允许从 feedback 后继续
- 不 replay events直接继续执行
### 4.8.3 任务调度与决策
**Planner 是决策中枢**,根据当前 state 动态决定下一步:
```typescript
private planTasks(context: KernelPlanningContext): KernelTask[] {
// 阶段1: 前置条件检查(顺序执行)
if (!context.state.workspacePath) {
return [{ kind: 'skill', name: 'prepare_workspace' }];
}
if (!context.state.context) {
return [{ kind: 'skill', name: 'build_context' }];
}
// 阶段2: 上下文压缩决策
if (shouldCompress(context)) {
return [{ kind: 'skill', name: 'compress_context' }];
}
// 阶段3: Triage 决策(生成自主审查提示)
if (!context.state.triage) {
return [{ kind: 'subagent', name: 'review:triage' }];
}
// 阶段4: 单次完整自主审查
if (!context.state.reviewCompleted) {
return [{ kind: 'subagent', name: 'review:full_review' }];
}
// 阶段5: 发布与收尾
if (!context.state.published) {
return [{ kind: 'skill', name: 'publish_review' }];
}
return []; // 完成
}
```
**决策依据**:
- **当前 State**: `triage`, `reviewCompleted`, `findings`, `published`, `reviewedRefSaved` 等字段
- **Tags/Capabilities**: 按标签选择 subagent`filterByTag('triage')`),非硬编码
- **Config 开关**: 审查引擎、工作区、命令白名单等运行配置
### 4.8.4 Skills 与 Subagents 调用机制
**Skills - 原子任务**:
```typescript
// 注册 Skills
this.skillRegistry.register(createPrepareWorkspaceSkill());
this.skillRegistry.register(createBuildContextSkill());
// Skill 定义
{
kind: 'skill',
name: 'build_context',
execute: async (task, context) => {
// 执行业务逻辑
const reviewContext = await diffExtractor.buildContext(...);
return {
state: { ...context.state, context: reviewContext }, // 更新状态
// 可选控制流
prepend: [], // 在当前任务前插入新任务
enqueue: [], // 在当前任务后追加新任务
stopReason: undefined // 或 'completed', 'failed', 'awaiting_human_feedback'
};
}
}
```
**Subagents - 委派执行**:
```typescript
// 调用路径
AgentKernelRunner KernelAgentInvoker.invoke(task, context)
invocation record
subagent.execute(task, agentContext)
invocation
```
```typescript
// Subagent 执行上下文
const agentContext: KernelAgentExecutionContext = {
...context,
agent, // subagent 定义
delegation: { // 委派包
goal: agent.whenToUse,
parentTaskName: task.name,
input: task.input,
contextSummary: state.compressedContext?.summary // 压缩摘要回注
}
};
// 执行(带 AsyncLocalStorage 隔离)
const result = await runWithKernelAgentContext(
{ agentId, parentSessionId, agentType: 'subagent', ... },
() => agent.execute(task, agentContext)
);
```
### 4.8.5 Tools 调用机制
**调用路径**(在 `review:full_review` 内部):
```mermaid
sequenceDiagram
participant FullReview as AutonomousReviewAgent
participant Loop as Autonomous Loop
participant Orchestration as ToolOrchestration
participant Permission as Permission Gating
participant Hook as PreToolUse Hook
participant Tool as Tool.execute()
participant PostHook as PostToolUse Hook
FullReview->>Loop: 决定调用 tool
Loop->>Orchestration: partitionToolCalls(tools)
Orchestration->>Permission: evaluateToolPermission(tool)
Permission-->>Orchestration: allow/ask/deny
Orchestration->>Hook: runKernelHooks(PreToolUse)
Hook-->>Orchestration: additionalContext/updatedInput
Orchestration->>Tool: tool.execute(args)
Tool-->>Orchestration: result
Orchestration->>PostHook: runKernelHooks(PostToolUse)
PostHook-->>Orchestration: -
Orchestration-->>Loop: toolResult
Loop-->>FullReview: 更新 diagnostics/findings
```
**并发控制**:
- **并发安全工具** (`isConcurrencySafe: true`): 并行执行
- **非并发安全工具**: 串行执行
- **权限拦截**: `PermissionRequest` Hook 可批准/阻断
**权限边界**:
| Scope | 默认行为 | 说明 |
|-------|----------|------|
| `read` | `allow` | 安全操作(读文件、搜索代码) |
| `write` | `ask` | 需审批(写文件) |
| `command` | `ask` | 需审批(执行命令) |
| `git_write` | `ask` | 需审批Git 操作) |
| `network` | `deny` | 禁止网络访问 |
| `cross_session` | `deny` | 禁止跨 session 操作 |
### 4.8.6 代码审查结合流程
**完整数据流**:
```
Webhook → PR/Commit
prepare_workspace → 克隆仓库、准备 mirror/workspace
build_context → 提取 diff、文件内容、构建 ReviewContext
compress_context (可选) → 大上下文自动压缩,生成 summary
review:triage → 生成自主审查提示、模式和预算
review:full_review → 单个自主代理跨文件调查,生成 findings
publish_review → 发布 summary + line comments
save_reviewed_ref → 保存审查快照(支持增量审查)
```
**状态流转**:
```mermaid
stateDiagram-v2
[*] --> prepare_workspace: 启动
prepare_workspace --> build_context: 成功
build_context --> compress_context: 上下文过大
build_context --> triage: 正常
compress_context --> triage: 完成
triage --> full_review: 提示生成完成
full_review --> publish_review: findings 聚合完成
publish_review --> save_reviewed_ref: 直接完成
save_reviewed_ref --> [*]: completed
```
### 4.8.7 边界划分
**Skills vs Subagents 边界**:
| 维度 | Skills | Subagents |
|------|--------|-----------|
| **粒度** | 原子操作(准备环境、构建上下文、发布) | 复杂推理(规划、完整审查) |
| **模型** | 通常不涉及 LLM | 必须调用 LLMplanner/specialist |
| **并发** | 顺序执行 | 通过单个 full review 代理内部自主工具调用实现调查 |
| **状态** | 修改 state 字段 | 可修改 state主要产出 hints/findings/diagnostics |
| **失败** | 阻断整个流程 | 可单独重试或降级 |
| **示例** | prepare_workspace, publish_review | review:triage, review:full_review |
**Runtime vs Runner 边界**:
| 组件 | 职责 | 不做什么 |
|------|------|----------|
| **AgentKernelRunner** | 通用调度、checkpoint、task 循环 | 不感知 Review 业务逻辑 |
| **ReviewKernelRuntime** | Review 业务封装、skills、subagents、hooks | 不直接调度任务(委托给 runner |
**Subagents 间边界**:
| Subagent | 输入 | 输出 | 边界限制 |
|----------|------|------|----------|
| **triage** | ReviewContext | review hints + budget | 只生成提示,不审查 |
| **full_review** | ReviewTask + context | findings[] + diagnostics | 一次完整自主审查,不预拆域或文件 |
**Hook 介入边界**:
```typescript
// 在关键生命周期点介入
SessionStart // session 启动时
SubagentStart // subagent 启动时
PreToolUse // 工具调用前(可修改输入、阻断)
PermissionRequest // 权限请求时(决定 allow/ask/deny
PostToolUse // 工具调用成功后
PostToolUseFailure // 工具调用失败后
```
**Session 隔离边界**:
- 每个 PR/Commit 对应独立 session
- session 间 state 不共享
- tool 默认禁止 cross_session 操作
- subagent invocation 绑定 parentSessionId
---
## 5. 运行时与状态设计
### 5.1 Session 与 Checkpoint
每条 PR/commit 审查对应一个 kernel session
| 数据 | 用途 |
|---|---|
| `KernelSessionRecord` | 记录 scopeType、scopeKey、metadata、lastRunId |
| `KernelSessionEventRecord` | append-only 事件流,记录 run/task/hook/feedback 生命周期 |
| `KernelCheckpoint<TState>` | 持久化 state、pendingTasks、stopReason |
| `KernelSubagentInvocationRecord` | 记录每次 subagent 委派调用 |
恢复语义:
- `continueExisting=true` 时从 persisted checkpoint 恢复 `state + pendingTasks`
- 显式忽略旧 checkpoint 的 stopReason允许 feedback 后继续推进;
- 当前不 replay session events 重建 stateevent 主要用于投影与审计。
### 5.2 ReviewKernelState
核心状态包括:
| 字段 | 说明 |
|---|---|
| `targetSha` | 当前审查目标 commit |
| `mirrorPath/workspacePath` | 本地仓库与工作区路径 |
| `context` | `ReviewContext`,包含 diff、changedFiles、fileContents 等 |
| `projectPrompt` | 仓库级审查 prompt |
| `compressedContext` | 自动压缩摘要及 token 元数据 |
| `triage/reviewTask/reviewCompleted` | 自主审查提示、预算与完成状态 |
| `findings` | subagents 收集到的问题 |
| `reviewDiagnostics` | full review 工具调用、停止原因、解析计数等诊断信息 |
| `published/reviewedRefSaved` | 发布与审查快照保存状态位 |
### 5.3 Subagent Invocation
每次 subagent 调用会持久化:
| 字段 | 说明 |
|---|---|
| `parent_session_id` | 父 session |
| `parent_run_id` | 当前 review run |
| `parent_task_name` | 触发该调用的 task name |
| `subagent_name` | subagent id例如 `review:triage` |
| `agent_id` | 本次调用唯一 agent identity |
| `status` | running / completed / failed |
| `input_json` | delegation packet |
| `result_json` | structured invocation result |
失败处理:
- invoker 将 invocation 标记为 `failed`
- runner 写入 `task_failed` event
- checkpoint 保存当前 state 与 `[failedTask, ...pendingTasks]`stopReason=`failed`
- 调用方可根据 checkpoint 与错误信息决定重试/人工介入。
### 5.4 上下文压缩与回注
```mermaid
sequenceDiagram
participant P as Planner
participant C as ContextCompressionService
participant S as Session Checkpoint
participant A as Subagent
P->>C: shouldCompress(context, compressedContext)
C-->>P: true when tokenEstimate >= contextWindow * 0.8
P->>C: compress(context, projectPrompt)
C-->>S: compressedContext(summary, token stats, model, timestamp)
P->>A: invoke subagent with contextSummary
A-->>A: prompt includes compressed summary
```
压缩触发阈值:
- 使用 `tokenCounter.getContextWindow(plannerModel)` 获取模型上下文窗口;
- 取 80% 作为触发阈值,预留 20% 冗余;
- 若无法获取模型配置,兜底使用默认窗口。
### 5.5 Hooks 与 Permission
内置 hooks
| Hook | Event | 作用 |
|---|---|---|
| `kernel:session-start-audit` | `SessionStart` | 写入 `hook_session_start` event |
| `kernel:subagent-start-audit` | `SubagentStart` | 写入 `hook_subagent_start` event |
| `kernel:pre-tool-audit` | `PreToolUse` | 为工具调用追加审计上下文 |
| `kernel:permission-request-audit` | `PermissionRequest` | 记录权限请求上下文 |
工具权限默认策略:
| Scope | 默认行为 |
|---|---|
| `read` | allow |
| `write` | ask |
| `command` | ask |
| `git_write` | ask |
| `network` | deny |
| `cross_session` | deny |
---
## 6. API 与管理后台可观测性
### 6.1 Admin API
| API | 说明 |
|---|---|
| `GET /admin/api/review/sessions` | 返回 session 列表与 summary |
| `GET /admin/api/review/sessions/:sessionId` | 返回 session、summary、checkpoint、plan、timeline、events、subagentInvocations、runDetails |
| `GET /admin/api/review/kernel/tasks` | 返回 skill + subagent task catalog |
| `GET /admin/api/review/kernel/subagents` | 返回 subagent catalog |
| `GET /admin/api/review/kernel/hooks` | 返回 hook catalog |
### 6.2 Subagent Catalog 响应字段
```json
{
"kind": "subagent",
"name": "review:full_review",
"source": "built-in",
"description": "执行一次完整自主代码审查",
"whenToUse": "当 triage 生成审查提示后执行完整审查",
"modelRole": "specialist",
"tags": ["review", "specialist", "full-review", "autonomous-review"],
"resumable": true
}
```
### 6.3 管理后台展示建议
管理后台应采用双层控制面:
- 上层Kernel Subagents 目录,展示 built-in/custom/plugin subagents
- 下层:模型角色路由,配置 `planner / specialist` 到 provider/model。
展示字段建议:
| 区域 | 字段 |
|---|---|
| Subagent 目录 | name、source、description、whenToUse、modelRole、tags、resumable |
| Session 详情 | summary、plan、timeline、findings、comments、subagentInvocations |
| Invocation 详情 | agentId、status、startedAt、finishedAt、summary、artifacts |
---
## 7. 非功能性设计
### 7.1 安全设计
- 工具调用统一走 permission gating避免 subagent 绕过权限策略;
- 高风险工具默认 ask/deny不允许直接执行网络、跨 session 或写操作;
- hooks 可作为后续审批、审计、通知与策略扩展点;
- LLM prompt 不作为安全边界,所有外部副作用必须由 tool/skill/adapters 承载。
### 7.2 高可用与恢复
- 每个 task 完成后保存 checkpoint降低失败后的重复工作
- subagent invocation 失败会记录 failed 状态,便于定位失败代理;
- feedback 后通过 `continueExisting` 从 checkpoint 继续;
- publish 与 save reviewed ref 分离,避免评论发布与 ref 保存互相污染;
- cleanup workspace 放在 runtime finally 中执行,降低资源泄漏风险。
### 7.3 可观测性
- session event 记录 run/task/hook/feedback 生命周期;
- subagent invocation 记录 parent-child 委派关系;
- admin projection 汇总 plan/timeline/currentStep/findingCount/pendingTaskCount
- compression 记录 sourceTokenEstimate、summaryTokenEstimate、triggerThreshold、model。
### 7.4 性能与容量
- 大 diff 先经 diff extractor/token budget 裁剪,再由 compression service 做会话级摘要;
- `review:full_review` 在单个自主循环内使用工具逐步调查,避免运行时预拆 domain 或文件;
- tool orchestration 可并发执行 read-only 工具,非并发安全工具串行;
- session/event/checkpoint 使用 SQLite适合当前单体部署未来高并发可迁移到外部数据库。
### 7.5 可维护性与扩展性
- 新增内置 Agent 应只新增 `KernelSubagentDefinition` 并打 tags
- 新增流程副作用应优先实现 skill/adapters
- 新增横切逻辑应优先实现 hook
- 新增工具必须声明 permissionScope 和 isConcurrencySafe。
---
## 8. 测试与上线验证
### 8.1 自动化测试分层
| 层级 | 测试文件 | 覆盖点 |
|---|---|---|
| Unit | `src/review/kernel/__tests__/session-read-model.test.ts` | session summary/plan/timeline 投影 |
| Unit | `src/review/tools/__tests__/tool-permissions.test.ts` | permission scope 默认策略 |
| Contract | `src/agent-kernel/hooks/__tests__/kernel-hook-runner.test.ts` | hook 聚合、approve/block、updatedInput |
| Integration | `src/controllers/__tests__/admin-review-sessions.test.ts` | admin session 与 catalog API |
| Integration | `src/controllers/__tests__/feedback-kernel-session.test.ts` | feedback approve/reject/rollback/continue |
| Runtime | `src/review/kernel/__tests__/runtime-happy-path.test.ts` | 完整 runtime happy path |
| Runtime | `src/review/kernel/__tests__/runtime-feedback-resume.test.ts` | awaiting feedback 后恢复 |
| Runtime | `src/review/kernel/__tests__/runtime-replay-invariants.test.ts` | checkpoint/resume/replay 不变量 |
| Runtime | `src/review/kernel/__tests__/runtime-concurrency-idempotency.test.ts` | 并发上限与幂等 |
| Canary | `src/review/kernel/__tests__/compression-resumability.test.ts` | 压缩恢复与生产关键 canary |
### 8.2 上线前门禁
必须通过:
```bash
bun run lint
bun run build
bun test src/review/kernel/__tests__ src/review/tools/__tests__ src/controllers/__tests__ src/agent-kernel/hooks/__tests__
bun test
```
关键验收信号:
- runtime happy path 完成stopReason=`completed`
- feedback resume 从 `awaiting_human_feedback` 恢复到 completed
- compression resume 保留 targetSha、pending boundary、invocation boundary、summary
- permission deny 不会绕过工具治理;
- duplicate enqueue/continue/feedback 不产生重复有效工作;
- admin session detail 能看到 plan/timeline/subagentInvocations。
### 8.3 灰度与回滚
- 配置默认:`REVIEW_ENGINE=kernel`
- 若需要回滚,可临时切到 `codex` 引擎,但旧固定 agent 编排不再作为主路径;
- 灰度期间重点观察 session stopReason 分布、task_failed 事件、subagent failed invocations、feedback resume 成功率。
---
## 9. 风险、待确认与后续演进
### 9.1 风险与应对
| 风险 | 影响 | 应对 |
|---|---|---|
| Built-in definitions 仍在代码中 | 扩展仍需发版 | 下一阶段引入 plugin/custom subagent loader |
| SQLite 单文件并发能力有限 | 高并发 session 下写入竞争 | 当前单体可接受;未来迁移外部 DB 或队列化写入 |
| Compression summary 可能遗漏细节 | 后续 subagent 判断偏差 | 保留 recent context + summary测试锁定关键事实不丢 |
| Hook 阻断策略过强或过弱 | 工具误阻断或越权 | permission matrix 测试 + 审计 event + 管理后台策略展示 |
### 9.2 后续演进计划
1. **Plugin-based Subagent Loading**:支持从目录或配置加载 custom/plugin subagents。
2. **Child Session Tree**:为长任务或后台 subagent 引入 child session/resume tree。
3. **Attachment Reinjection**:压缩后恢复文件附件、计划附件和技能附件。
4. **更细粒度权限模型**:支持仓库级、工具级、用户级策略配置。
5. **Subagent 版本治理**:为 built-in/custom/plugin subagents 增加 version、enabled、rollout 字段。
### 9.3 评审清单
- [ ] 内置 Agent 是否都通过 registry/invoker 调用,而不是 runtime 硬编码实例?
- [ ] planner 是否按 tag/capability 选择 subagent
- [ ] 每次 subagent 调用是否有 invocation record
- [ ] feedback 后 continue 是否从 checkpoint 恢复?
- [ ] 压缩 summary 是否持久化并回注 triage/full_review
- [ ] 工具执行是否经过 permission/hook/orchestration
- [ ] 管理后台是否能展示 catalog、timeline、invocations
- [ ] 生产测试门禁是否覆盖 happy path、失败恢复、幂等和 canary
---
## 版本记录
| 版本 | 日期 | 说明 |
|---|---|---|
| v0.1 | 2026-04-28 | 初版:记录 Kernel 内置 Agent 架构、运行链路、可观测性与测试门禁 |

View File

@@ -11,6 +11,8 @@ RUN bun install --no-frozen-lockfile
COPY src ./src
COPY tsconfig.json .
COPY frontend/dist ./public
EXPOSE 5174
CMD ["bun", "run", "start"]

View File

@@ -0,0 +1,169 @@
import { afterAll, beforeAll, describe, expect, test } from 'bun:test';
import {
E2ETestHarness,
type Finding,
type Scenario,
type SessionDetail,
} from './e2e-test-harness';
function assertFindingsMatchScenario(findings: Finding[], scenario: Scenario): void {
expect(findings.length).toBeGreaterThanOrEqual(scenario.minFindings);
if (scenario.maxFindings !== undefined) {
expect(findings.length).toBeLessThanOrEqual(scenario.maxFindings);
}
const highSeverityCount = findings.filter((finding) => finding.severity === 'high').length;
expect(highSeverityCount).toBeGreaterThanOrEqual(scenario.minHighSeverity);
const fingerprints = findings
.map((finding) => finding.fingerprint)
.filter((value): value is string => Boolean(value));
expect(new Set(fingerprints).size).toBe(fingerprints.length);
}
function expectPipelineStepsCompleted(detail: SessionDetail): void {
const statusesByKey = new Map(detail.plan.map((step) => [step.key, step.status]));
expect(statusesByKey.get('prepare_workspace')).toBe('completed');
expect(statusesByKey.get('build_context')).toBe('completed');
expect(statusesByKey.get('review:triage')).toBe('completed');
expect(statusesByKey.get('review:full_review')).toBe('completed');
expect(statusesByKey.get('aggregate_findings')).toBe('completed');
expect(statusesByKey.get('publish_review')).toBe('completed');
expect(statusesByKey.get('save_reviewed_ref')).toBe('completed');
}
function expectAutonomousFullReviewPipeline(detail: SessionDetail): void {
const fullReviewInvocations = detail.subagentInvocations.filter(
(invocation) => invocation.subagentName === 'review:full_review'
);
expect(fullReviewInvocations).toHaveLength(1);
expect(fullReviewInvocations[0].status).toBe('completed');
expect(detail.checkpoint?.state?.reviewCompleted).toBe(true);
expect(detail.checkpoint?.state?.published).toBe(true);
expect(detail.checkpoint?.state?.reviewedRefSaved).toBe(true);
expect(detail.checkpoint?.state?.reviewDiagnostics?.toolCallNames).toEqual([
'search_code',
'read_file',
'read_file',
]);
expect(detail.checkpoint?.state?.reviewDiagnostics?.stopReason).toBe('modelFinalized');
const findings = detail.checkpoint?.state?.findings ?? [];
expect(findings.length).toBeGreaterThan(0);
expect(findings[0].detail).toContain('auth/user model');
expect(findings[0].evidence).toContain('src/auth.ts');
const publishedComments = detail.runDetails?.comments?.filter(
(comment) => comment.status === 'published'
);
expect(publishedComments?.length).toBeGreaterThan(0);
expect(publishedComments?.some((comment) => !comment.path)).toBe(true);
expect(publishedComments?.some((comment) => comment.path === 'src/user-handler.ts')).toBe(true);
}
describe('E2E Review Flow', () => {
const harness = new E2ETestHarness();
beforeAll(async () => {
await harness.start();
await harness.seedGitea();
}, 90_000);
afterAll(async () => {
await harness.stop();
});
test('核心链路验证: webhook → clone → triage → full_review → aggregate → publish → save ref → Gitea has comments', async () => {
const { owner, repo, prNumber } = await harness.seedPR('simple-bug-pr');
const webhookResponse = await harness.triggerWebhook(owner, repo, prNumber);
expect(webhookResponse.status).toBe('accepted');
const result = await harness.waitForReview(owner, repo, prNumber, 120);
expect(result.completed).toBe(true);
expect(result.sessionState).toBe('completed');
expectPipelineStepsCompleted(result.detail);
expect(result.detail.checkpoint?.state?.published).toBe(true);
expectAutonomousFullReviewPipeline(result.detail);
const comments = await harness.getGiteaComments(owner, repo, prNumber);
expect(comments.length).toBeGreaterThan(0);
}, 150_000);
test('状态正确性: session status transitions and checkpoint consistency', async () => {
const { owner, repo, prNumber } = await harness.seedPR('security-pr');
await harness.triggerWebhook(owner, repo, prNumber);
const snapshot = await harness.waitForSessionSnapshot(owner, repo, prNumber, 30);
expect(['queued', 'planning', 'executing', 'completed']).toContain(
snapshot.detail.summary.status
);
const result = await harness.waitForReview(owner, repo, prNumber, 120);
expect(['queued', 'planning', 'executing', 'completed']).toContain(result.observedStates[0]);
expect(result.sessionState).toBe('completed');
expect(result.detail.checkpoint?.stopReason).toBe('completed');
expect(result.detail.checkpoint?.pendingTasks ?? []).toHaveLength(0);
expect(result.detail.summary.findingCount).toBe(harness.extractFindings(result.detail).length);
}, 150_000);
test('Findings 质量: fixtures trigger expected triage modes, autonomous full review, and finding counts', async () => {
const fixtureNames = ['simple-bug-pr', 'minimal-change-pr'];
for (const fixtureName of fixtureNames) {
const { owner, repo, prNumber, scenario } = await harness.seedPR(fixtureName);
await harness.triggerWebhook(owner, repo, prNumber);
const result = await harness.waitForReview(owner, repo, prNumber, 120);
expect(result.sessionState).toBe('completed');
const triageMode = harness.extractTriageMode(result.detail);
if (triageMode !== undefined) {
expect(triageMode).toBe(scenario.expectedTriageMode);
}
expectPipelineStepsCompleted(result.detail);
expect(result.detail.subagentInvocations).toEqual(
expect.arrayContaining([
expect.objectContaining({ subagentName: 'review:full_review', status: 'completed' }),
])
);
assertFindingsMatchScenario(harness.extractFindings(result.detail), scenario);
}
}, 360_000);
test('幂等性: duplicate webhook does not create duplicate comments', async () => {
const { owner, repo, prNumber } = await harness.seedPR('duplicate-webhook-pr');
await harness.triggerWebhook(owner, repo, prNumber);
const firstResult = await harness.waitForReview(owner, repo, prNumber, 120);
expect(firstResult.sessionState).toBe('completed');
const firstComments = await harness.getGiteaComments(owner, repo, prNumber);
expect(firstComments.length).toBeGreaterThan(0);
const duplicateWebhookResponse = await harness.triggerWebhook(owner, repo, prNumber);
expect(['accepted', 'deduplicated']).toContain(duplicateWebhookResponse.status);
const secondResult = await harness.waitForReview(owner, repo, prNumber, 60);
expect(secondResult.sessionId).toBe(firstResult.sessionId);
const secondComments = await harness.getGiteaComments(owner, repo, prNumber);
expect(secondComments.length).toBe(firstComments.length);
expect(new Set(secondComments.map((comment) => comment.body)).size).toBe(
new Set(firstComments.map((comment) => comment.body)).size
);
}, 180_000);
test('错误恢复: clone failure marks session failed, not stuck', async () => {
const { owner, repo, prNumber } = await harness.seedPR('clean-refactor-pr');
await harness.triggerWebhook(owner, repo, prNumber, {
repositoryPatch: {
clone_url: `http://invalid-host-99999.local/${owner}/${repo}-missing.git`,
},
});
const result = await harness.waitForReview(owner, repo, prNumber, 120);
expect(['completed', 'failed']).toContain(result.sessionState);
}, 150_000);
});

View File

@@ -0,0 +1,748 @@
import { createHmac } from 'node:crypto';
import { existsSync, mkdirSync, mkdtempSync, rmSync } from 'node:fs';
import { readFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import path from 'node:path';
const ENCRYPTION_KEY = '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef';
const WEBHOOK_SECRET = 'e2e-test-webhook-secret';
const TERMINAL_STATES = new Set(['completed', 'failed', 'ignored', 'cancelled', 'error']);
type JsonPrimitive = string | number | boolean | null;
type JsonValue = JsonPrimitive | JsonValue[] | { [key: string]: JsonValue };
interface GiteaUser {
login: string;
full_name?: string;
}
interface GiteaRepo {
id: number;
name: string;
full_name: string;
clone_url: string;
html_url: string;
ssh_url?: string;
owner: GiteaUser;
}
interface GiteaPullRequest {
id: number;
number: number;
title: string;
html_url: string;
head: {
ref: string;
sha: string;
repo?: GiteaRepo;
};
base: {
ref: string;
sha: string;
repo?: GiteaRepo;
};
requested_reviewers?: GiteaUser[];
user?: GiteaUser;
}
interface Scenario {
name: string;
description: string;
expectedTriageMode: string;
expectedDomains: string[];
minFindings: number;
maxFindings?: number;
minHighSeverity: number;
testIdempotency?: boolean;
}
interface AdminLoginResponse {
token: string;
}
interface SessionSummary {
sessionId: string;
owner?: string;
repo?: string;
prNumber?: number;
status: string;
findingCount: number;
}
interface SessionListEntry {
session: {
id: string;
metadata?: Record<string, JsonValue>;
};
summary: SessionSummary;
}
interface SessionListResponse {
data: SessionListEntry[];
}
interface Finding {
severity?: string;
confidence?: number;
path?: string;
line?: number;
title?: string;
detail?: string;
evidence?: string;
category?: string;
domain?: string;
fingerprint?: string;
}
interface SessionDetail {
session: {
id: string;
metadata?: Record<string, JsonValue>;
};
summary: SessionSummary;
checkpoint: {
stopReason?: string;
pendingTasks?: Array<{ name: string }>;
state?: {
targetSha?: string;
triage?: {
mode?: string;
domains?: string[];
};
triageMode?: string;
findings?: Finding[];
published?: boolean;
reviewedRefSaved?: boolean;
reviewCompleted?: boolean;
reviewedRef?: string;
reviewDiagnostics?: {
toolCallNames?: string[];
toolCallCount?: number;
parsedFindingCount?: number;
stopReason?: string;
};
};
} | null;
plan: Array<{ key: string; status: string; label: string }>;
events: Array<{ eventType: string; payload: Record<string, JsonValue> }>;
runDetails: {
findings?: Finding[];
comments?: Array<{
status?: string;
path?: string;
line?: number;
body?: string;
fingerprint?: string;
}>;
} | null;
subagentInvocations: Array<{
subagentName: string;
status: string;
result?: Record<string, JsonValue>;
}>;
}
interface GiteaTokenResponse {
sha1?: string;
token?: string;
}
interface CommentLike {
id: number;
body: string;
path?: string;
line?: number;
}
interface SeedResult {
owner: string;
repo: string;
prNumber: number;
scenario: Scenario;
}
interface ReviewWaitResult {
completed: boolean;
sessionState: string;
sessionId: string;
detail: SessionDetail;
observedStates: string[];
}
interface TriggerWebhookOptions {
repositoryPatch?: Partial<GiteaRepo>;
action?: string;
}
export class E2ETestHarness {
readonly giteaUrl = (process.env.E2E_GITEA_URL ?? 'http://localhost:3333').replace(/\/$/, '');
readonly adminUser = process.env.E2E_GITEA_ADMIN_USER ?? 'e2e-admin';
readonly adminPass = process.env.E2E_GITEA_ADMIN_PASS ?? 'e2ePassword123!';
private assistantProcess?: Bun.Subprocess<'pipe', 'pipe', 'pipe'>;
private assistantPort = 43100 + Math.floor(Math.random() * 1000);
private tempDir = mkdtempSync(path.join(tmpdir(), 'e2e-assistant-'));
private databasePath = path.join(this.tempDir, 'assistant.db');
private reviewWorkDir = path.join(this.tempDir, 'review-workdir');
private adminJwt?: string;
private giteaToken?: string;
private repoCounter = 0;
get assistantUrl(): string {
return `http://127.0.0.1:${this.assistantPort}`;
}
async start(): Promise<void> {
await this.startAssistant();
this.adminJwt = await this.getAdminJWT();
}
async stop(): Promise<void> {
this.stopAssistant();
}
async startAssistant(): Promise<void> {
if (this.assistantProcess) return;
this.assistantProcess = Bun.spawn(['bun', 'run', 'src/index.ts'], {
cwd: path.resolve(import.meta.dir, '../..'),
stdout: 'pipe',
stderr: 'pipe',
env: {
...process.env,
E2E_MOCK_LLM: '1',
ENCRYPTION_KEY,
DATABASE_PATH: this.databasePath,
REVIEW_ENGINE: 'kernel',
PORT: String(this.assistantPort),
LOG_LEVEL: process.env.LOG_LEVEL ?? 'error',
},
});
this.drainProcessOutput(this.assistantProcess.stdout, 'assistant stdout');
this.drainProcessOutput(this.assistantProcess.stderr, 'assistant stderr');
await this.waitForAssistantHealth();
}
stopAssistant(): void {
if (this.assistantProcess) {
this.assistantProcess.kill();
this.assistantProcess = undefined;
}
if (existsSync(this.tempDir)) {
rmSync(this.tempDir, { recursive: true, force: true });
}
}
async seedGitea(): Promise<void> {
await this.waitForGitea();
await this.ensureAdminUser();
this.giteaToken = await this.createToken();
await this.configureAssistant();
}
async seedPR(scenarioName: string): Promise<SeedResult> {
if (!this.giteaToken) {
await this.seedGitea();
}
const scenario = await this.readScenario(scenarioName);
const owner = this.adminUser;
const repo = `e2e-${scenarioName.replace(/[^a-z0-9-]/gi, '-')}-${Date.now()}-${this.repoCounter++}`;
const baseBranch = 'main';
const featureBranch = `feature/${scenarioName}-${this.repoCounter}`;
await this.createRepo(repo);
await this.pushBranchWithFiles(
owner,
repo,
baseBranch,
await this.readFixtureFiles(scenarioName, 'base'),
`test: seed ${scenario.name} base`
);
await this.pushBranchWithFiles(
owner,
repo,
featureBranch,
await this.readFixtureFiles(scenarioName, 'branch'),
`feat: ${scenario.description}`
);
const pr = await this.createPullRequest(
owner,
repo,
scenario.description,
featureBranch,
baseBranch
);
await this.createWebhook(owner, repo);
return { owner, repo, prNumber: pr.number, scenario };
}
async triggerWebhook(
owner: string,
repo: string,
prNumber: number,
options: TriggerWebhookOptions = {}
): Promise<{ status: string; runId?: string }> {
const repository = await this.giteaFetch<GiteaRepo>(`/repos/${owner}/${repo}`);
const pullRequest = await this.giteaFetch<GiteaPullRequest>(
`/repos/${owner}/${repo}/pulls/${prNumber}`
);
const normalizedRepository = this.normalizeRepoUrls({
...repository,
...options.repositoryPatch,
owner: repository.owner,
});
const payload = {
action: options.action ?? 'opened',
number: prNumber,
pull_request: {
...pullRequest,
head: {
...pullRequest.head,
repo: pullRequest.head.repo ? this.normalizeRepoUrls(pullRequest.head.repo) : undefined,
},
base: {
...pullRequest.base,
repo: pullRequest.base.repo ? this.normalizeRepoUrls(pullRequest.base.repo) : undefined,
},
requested_reviewers: pullRequest.requested_reviewers ?? [],
},
repository: normalizedRepository,
sender: repository.owner,
};
const body = JSON.stringify(payload);
const signature = createHmac('sha256', WEBHOOK_SECRET).update(body).digest('hex');
return this.fetchJson<{ status: string; runId?: string }>(
`${this.assistantUrl}/webhook/gitea`,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-Gitea-Event': 'pull_request',
'X-Gitea-Signature': signature,
},
body,
}
);
}
async waitForReview(
owner: string,
repo: string,
prNumber: number,
timeoutSeconds = 120
): Promise<ReviewWaitResult> {
const deadline = Date.now() + timeoutSeconds * 1000;
const observedStates: string[] = [];
while (Date.now() < deadline) {
const entry = await this.findSession(owner, repo, prNumber);
if (entry) {
const status = entry.summary.status;
if (observedStates.at(-1) !== status) observedStates.push(status);
const detail = await this.getSessionDetail(entry.summary.sessionId);
const detailStatus = detail.summary.status;
if (observedStates.at(-1) !== detailStatus) observedStates.push(detailStatus);
if (TERMINAL_STATES.has(detailStatus)) {
return {
completed: detailStatus === 'completed',
sessionState: detailStatus,
sessionId: entry.summary.sessionId,
detail,
observedStates,
};
}
}
await this.sleep(2000);
}
throw new Error(
`Timed out waiting for review ${owner}/${repo}#${prNumber}; observed states: ${observedStates.join(' -> ') || 'none'}`
);
}
async waitForSessionSnapshot(
owner: string,
repo: string,
prNumber: number,
timeoutSeconds = 30
): Promise<{ entry: SessionListEntry; detail: SessionDetail }> {
const deadline = Date.now() + timeoutSeconds * 1000;
while (Date.now() < deadline) {
const entry = await this.findSession(owner, repo, prNumber);
if (entry) {
return { entry, detail: await this.getSessionDetail(entry.summary.sessionId) };
}
await this.sleep(500);
}
throw new Error(`Timed out waiting for session snapshot ${owner}/${repo}#${prNumber}`);
}
async getAdminJWT(): Promise<string> {
const response = await this.fetchJson<AdminLoginResponse>(
`${this.assistantUrl}/admin/api/login`,
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ password: 'password' }),
}
);
return response.token;
}
async getSessionDetail(sessionId: string): Promise<SessionDetail> {
return this.adminFetch<SessionDetail>(
`/admin/api/review/sessions/${encodeURIComponent(sessionId)}`
);
}
async getGiteaComments(owner: string, repo: string, prNumber: number): Promise<CommentLike[]> {
const issueComments = await this.giteaFetch<CommentLike[]>(
`/repos/${owner}/${repo}/issues/${prNumber}/comments`
);
const reviews = await this.giteaFetch<{ id: number }[]>(
`/repos/${owner}/${repo}/pulls/${prNumber}/reviews`
);
const reviewCommentLists = await Promise.all(
reviews.map((r) =>
this.giteaFetch<CommentLike[]>(
`/repos/${owner}/${repo}/pulls/${prNumber}/reviews/${r.id}/comments`
).catch(() => [] as CommentLike[])
)
);
const reviewComments = reviewCommentLists.flat();
return [...issueComments, ...reviewComments];
}
extractFindings(detail: SessionDetail): Finding[] {
return detail.checkpoint?.state?.findings ?? detail.runDetails?.findings ?? [];
}
extractTriageMode(detail: SessionDetail): string | undefined {
return detail.checkpoint?.state?.triage?.mode ?? detail.checkpoint?.state?.triageMode;
}
extractDomains(detail: SessionDetail): string[] {
const triageDomains = detail.checkpoint?.state?.triage?.domains;
return triageDomains ?? [];
}
private async configureAssistant(): Promise<void> {
await this.putConfig({
GITEA_API_URL: `${this.giteaUrl}/api/v1`,
GITEA_ACCESS_TOKEN: this.requireToken(),
GITEA_ADMIN_TOKEN: this.requireToken(),
WEBHOOK_SECRET,
REVIEW_ENGINE: 'kernel',
REVIEW_WORKDIR: this.reviewWorkDir,
REVIEW_COMMAND_TIMEOUT_MS: '30000',
REVIEW_ALLOWED_COMMANDS: 'git,rg,cat,sed,wc',
});
}
private async putConfig(values: Record<string, string>): Promise<void> {
const token = this.adminJwt ?? (await this.getAdminJWT());
const response = await fetch(`${this.assistantUrl}/admin/api/config`, {
method: 'PUT',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${token}`,
},
body: JSON.stringify(values),
});
if (!response.ok) {
throw new Error(`Failed to configure assistant: ${response.status} ${await response.text()}`);
}
}
private async findSession(
owner: string,
repo: string,
prNumber: number
): Promise<SessionListEntry | undefined> {
const payload = await this.adminFetch<SessionListResponse>(
'/admin/api/review/sessions?limit=100'
);
return payload.data.find((entry) => {
const metadata = entry.session.metadata ?? {};
const metadataOwner = typeof metadata.owner === 'string' ? metadata.owner : undefined;
const metadataRepo = typeof metadata.repo === 'string' ? metadata.repo : undefined;
const metadataPr =
typeof metadata.prNumber === 'number' ? metadata.prNumber : Number(metadata.prNumber);
return (
(entry.summary.owner ?? metadataOwner) === owner &&
(entry.summary.repo ?? metadataRepo) === repo &&
(entry.summary.prNumber ?? metadataPr) === prNumber
);
});
}
private async adminFetch<T>(apiPath: string): Promise<T> {
const token = this.adminJwt ?? (await this.getAdminJWT());
return this.fetchJson<T>(`${this.assistantUrl}${apiPath}`, {
headers: { Authorization: `Bearer ${token}` },
});
}
private async waitForAssistantHealth(): Promise<void> {
const deadline = Date.now() + 30_000;
while (Date.now() < deadline) {
try {
const response = await fetch(`${this.assistantUrl}/api/health`);
if (response.ok) return;
} catch {
await this.sleep(2000);
}
}
throw new Error(`Assistant did not become healthy at ${this.assistantUrl}`);
}
private async waitForGitea(): Promise<void> {
const deadline = Date.now() + 60_000;
while (Date.now() < deadline) {
try {
const response = await fetch(`${this.giteaUrl}/api/v1/version`);
if (response.ok) return;
} catch {
await this.sleep(2000);
}
await this.sleep(2000);
}
throw new Error(`Gitea did not become available at ${this.giteaUrl}`);
}
private async ensureAdminUser(): Promise<void> {
const loginCheck = await fetch(`${this.giteaUrl}/api/v1/user`, {
headers: { Authorization: `Basic ${btoa(`${this.adminUser}:${this.adminPass}`)}` },
});
if (loginCheck.ok) return;
const body = JSON.stringify({
username: this.adminUser,
password: this.adminPass,
email: `${this.adminUser}@e2e-test.local`,
must_change_password: false,
login_name: this.adminUser,
admin_permission: true,
});
for (const [user, pass] of [
[this.adminUser, this.adminPass],
['root', 'root'],
] as const) {
const response = await fetch(`${this.giteaUrl}/api/v1/admin/users`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Basic ${btoa(`${user}:${pass}`)}`,
},
body,
});
if (response.ok || response.status === 422 || response.status === 409) return;
}
const retryLogin = await fetch(`${this.giteaUrl}/api/v1/user`, {
headers: { Authorization: `Basic ${btoa(`${this.adminUser}:${this.adminPass}`)}` },
});
if (!retryLogin.ok) {
throw new Error(
`Unable to create or authenticate Gitea admin user: ${retryLogin.status} ${await retryLogin.text()}`
);
}
}
private async createToken(): Promise<string> {
const response = await fetch(
`${this.giteaUrl}/api/v1/users/${encodeURIComponent(this.adminUser)}/tokens`,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Basic ${btoa(`${this.adminUser}:${this.adminPass}`)}`,
},
body: JSON.stringify({ name: `e2e-token-${Date.now()}`, scopes: ['all'] }),
}
);
if (!response.ok) {
throw new Error(`Failed to create Gitea token: ${response.status} ${await response.text()}`);
}
const payload = (await response.json()) as GiteaTokenResponse;
const token = payload.sha1 ?? payload.token;
if (!token) throw new Error('Gitea token response did not include sha1/token');
return token;
}
private async createRepo(name: string): Promise<GiteaRepo> {
return this.giteaFetch<GiteaRepo>('/user/repos', {
method: 'POST',
body: JSON.stringify({ name, auto_init: true, default_branch: 'main' }),
});
}
private async createPullRequest(
owner: string,
repo: string,
description: string,
head: string,
base: string
): Promise<GiteaPullRequest> {
return this.giteaFetch<GiteaPullRequest>(`/repos/${owner}/${repo}/pulls`, {
method: 'POST',
body: JSON.stringify({
title: `E2E: ${description}`,
body: `E2E test PR: ${description}`,
head,
base,
}),
});
}
private async createWebhook(owner: string, repo: string): Promise<void> {
await this.giteaFetch<JsonValue>(`/repos/${owner}/${repo}/hooks`, {
method: 'POST',
body: JSON.stringify({
type: 'gitea',
active: true,
events: ['pull_request'],
config: {
url: `${this.assistantUrl}/webhook/gitea`,
content_type: 'json',
secret: WEBHOOK_SECRET,
},
}),
});
}
private async giteaFetch<T>(apiPath: string, init: RequestInit = {}): Promise<T> {
return this.fetchJson<T>(`${this.giteaUrl}/api/v1${apiPath}`, {
...init,
headers: {
'Content-Type': 'application/json',
Authorization: `token ${this.requireToken()}`,
...(init.headers ?? {}),
},
});
}
private async fetchJson<T>(url: string, init: RequestInit = {}): Promise<T> {
const response = await fetch(url, init);
if (!response.ok) {
throw new Error(`HTTP ${response.status} for ${url}: ${await response.text()}`);
}
return (await response.json()) as T;
}
private async readScenario(scenarioName: string): Promise<Scenario> {
const scenarioPath = path.join(this.fixturesDir(), scenarioName, 'scenario.json');
return JSON.parse(await readFile(scenarioPath, 'utf-8')) as Scenario;
}
private async readFixtureFiles(
scenarioName: string,
fixturePart: 'base' | 'branch'
): Promise<Record<string, string>> {
const dir = path.join(this.fixturesDir(), scenarioName, fixturePart);
const files: Record<string, string> = {};
const glob = new Bun.Glob('**/*');
for await (const file of glob.scan({ cwd: dir, onlyFiles: true })) {
files[file] = await readFile(path.join(dir, file), 'utf-8');
}
return files;
}
private async pushBranchWithFiles(
owner: string,
repo: string,
branchName: string,
files: Record<string, string>,
commitMessage: string
): Promise<void> {
const tmpDir = mkdtempSync(
path.join(tmpdir(), `e2e-push-${branchName.replace(/[^a-z0-9-]/gi, '-')}-`)
);
const cloneUrl = `${this.giteaUrl.replace('http://', `http://${this.adminUser}:${this.adminPass}@`)}/${owner}/${repo}.git`;
try {
await this.exec(['git', 'clone', cloneUrl, tmpDir]);
await this.exec(['git', 'checkout', '-B', branchName], tmpDir);
for (const [filePath, content] of Object.entries(files)) {
const destination = path.join(tmpDir, filePath);
mkdirSync(path.dirname(destination), { recursive: true });
await Bun.write(destination, content);
}
await this.exec(['git', 'config', 'user.email', 'e2e@test.local'], tmpDir);
await this.exec(['git', 'config', 'user.name', 'E2E Bot'], tmpDir);
await this.exec(['git', 'add', '-A'], tmpDir);
await this.exec(['git', 'commit', '-m', commitMessage, '--allow-empty'], tmpDir);
await this.exec(['git', 'push', 'origin', branchName, '--force'], tmpDir);
} finally {
rmSync(tmpDir, { recursive: true, force: true });
}
}
private async exec(args: string[], cwd?: string): Promise<void> {
const proc = Bun.spawn(args, { cwd, stdout: 'pipe', stderr: 'pipe' });
const [stdout, stderr, exitCode] = await Promise.all([
new Response(proc.stdout).text(),
new Response(proc.stderr).text(),
proc.exited,
]);
if (exitCode !== 0) {
throw new Error(`Command failed (${args.join(' ')}):\n${stdout}\n${stderr}`);
}
}
private fixturesDir(): string {
return path.resolve(import.meta.dir, '../fixtures');
}
private normalizeRepoUrls(repo: GiteaRepo): GiteaRepo {
return {
...repo,
clone_url: this.normalizeGiteaUrl(repo.clone_url),
html_url: this.normalizeGiteaUrl(repo.html_url),
ssh_url: repo.ssh_url ? this.normalizeGiteaUrl(repo.ssh_url) : repo.ssh_url,
};
}
private normalizeGiteaUrl(value: string): string {
return value.replace('http://gitea:3000', this.giteaUrl);
}
private requireToken(): string {
if (!this.giteaToken) throw new Error('Gitea token is not initialized');
return this.giteaToken;
}
private drainProcessOutput(stream: ReadableStream<Uint8Array>, label: string): void {
void new Response(stream).text().then((output) => {
if (output.trim().length > 0 && process.env.E2E_DEBUG === '1') {
console.log(`[${label}] ${output}`);
}
});
}
private sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
}
export type { Finding, ReviewWaitResult, Scenario, SeedResult, SessionDetail };

View File

@@ -0,0 +1,21 @@
interface Order {
id: string;
total: number;
}
interface Invoice {
id: string;
total: number;
}
export function summarizeOrder(order: Order): string {
const rounded = Math.round(order.total * 100) / 100;
const formatted = rounded.toFixed(2);
return `Order ${order.id}: $${formatted}`;
}
export function summarizeInvoice(invoice: Invoice): string {
const rounded = Math.round(invoice.total * 100) / 100;
const formatted = rounded.toFixed(2);
return `Invoice ${invoice.id}: $${formatted}`;
}

View File

@@ -0,0 +1,22 @@
interface Order {
id: string;
total: number;
}
interface Invoice {
id: string;
total: number;
}
function formatCurrency(total: number): string {
const rounded = Math.round(total * 100) / 100;
return rounded.toFixed(2);
}
export function summarizeOrder(order: Order): string {
return `Order ${order.id}: $${formatCurrency(order.total)}`;
}
export function summarizeInvoice(invoice: Invoice): string {
return `Invoice ${invoice.id}: $${formatCurrency(invoice.total)}`;
}

View File

@@ -0,0 +1,9 @@
{
"name": "clean-refactor-pr",
"description": "正确的重构",
"expectedTriageMode": "light",
"expectedDomains": ["correctness"],
"minFindings": 0,
"maxFindings": 1,
"minHighSeverity": 0
}

View File

@@ -0,0 +1,7 @@
export function startApp(): string {
return 'sunny-cactus app started';
}
if (import.meta.main) {
console.log(startApp());
}

View File

@@ -0,0 +1,7 @@
# Sunny Cactus Demo
This fixture updates documentation only. It explains how to start the sample app and does not change runtime behavior.
## Usage
Run the application entrypoint and verify that it prints a startup message.

View File

@@ -0,0 +1,8 @@
{
"name": "docs-only-pr",
"description": "纯文档变更",
"expectedTriageMode": "skip",
"expectedDomains": [],
"minFindings": 0,
"minHighSeverity": 0
}

View File

@@ -0,0 +1,22 @@
export interface User {
id: string;
name: string;
role: 'user' | 'admin';
}
const users = new Map<string, User>([
['token-user', { id: 'u1', name: 'Alice', role: 'user' }],
['token-admin', { id: 'u2', name: 'Bob', role: 'admin' }],
]);
export function authenticate(token: string): User | null {
if (!token.trim()) {
return null;
}
return users.get(token) ?? null;
}
export function requireAdmin(user: User | null): boolean {
return user?.role === 'admin';
}

View File

@@ -0,0 +1,20 @@
interface UserRecord {
id: string;
email: string;
profile?: {
displayName?: string;
};
}
interface Database {
query<T = unknown>(sql: string): Promise<T[]>;
}
export async function getUserDisplayName(user: UserRecord | null): Promise<string> {
return user.profile!.displayName!.toUpperCase();
}
export async function findUserByEmail(db: Database, email: string): Promise<UserRecord | null> {
const rows = await db.query<UserRecord>(`SELECT * FROM users WHERE email = '${email}'`);
return rows[0] ?? null;
}

View File

@@ -0,0 +1,9 @@
{
"name": "duplicate-webhook-pr",
"description": "重复webhook幂等性测试",
"expectedTriageMode": "light",
"expectedDomains": ["correctness"],
"minFindings": 1,
"minHighSeverity": 0,
"testIdempotency": true
}

View File

@@ -0,0 +1,15 @@
export function normalizeScore(score: number): number {
if (score < 0) {
return 0;
}
if (score > 100) {
return 100;
}
return Math.floor(score);
}
export function formatUserName(firstName: string, lastName: string): string {
return `${firstName} ${lastName}`.trim();
}

View File

@@ -0,0 +1,15 @@
export function normalizeScore(score: number): number {
if (score <= 0) {
return 0;
}
if (score >= 100) {
return 100;
}
return Math.floor(score);
}
export function formatUserName(firstName: string, lastName: string): string {
return `${firstName} ${lastName}`.trim();
}

View File

@@ -0,0 +1,9 @@
{
"name": "minimal-change-pr",
"description": "单文件微量变更",
"expectedTriageMode": "light",
"expectedDomains": ["correctness"],
"minFindings": 0,
"maxFindings": 3,
"minHighSeverity": 0
}

View File

@@ -0,0 +1,12 @@
export interface TokenPayload {
sub: string;
exp: number;
}
export function verifyToken(token: string, expectedToken: string): boolean {
return token.length > 0 && token === expectedToken;
}
export function isExpired(payload: TokenPayload, now = Date.now()): boolean {
return payload.exp * 1000 <= now;
}

View File

@@ -0,0 +1,14 @@
export interface TokenPayload {
sub: string;
exp: number;
}
const FALLBACK_ADMIN_TOKEN = 'admin-super-secret-token';
export function verifyToken(token: string, expectedToken: string): boolean {
return token.length > 0 && (token === expectedToken || token === FALLBACK_ADMIN_TOKEN);
}
export function isExpired(payload: TokenPayload, now = Date.now()): boolean {
return payload.exp * 1000 <= now;
}

View File

@@ -0,0 +1,8 @@
{
"name": "security-pr",
"description": "安全相关变更",
"expectedTriageMode": "light",
"expectedDomains": ["correctness"],
"minFindings": 1,
"minHighSeverity": 0
}

View File

@@ -0,0 +1,22 @@
export interface User {
id: string;
name: string;
role: 'user' | 'admin';
}
const users = new Map<string, User>([
['token-user', { id: 'u1', name: 'Alice', role: 'user' }],
['token-admin', { id: 'u2', name: 'Bob', role: 'admin' }],
]);
export function authenticate(token: string): User | null {
if (!token.trim()) {
return null;
}
return users.get(token) ?? null;
}
export function requireAdmin(user: User | null): boolean {
return user?.role === 'admin';
}

View File

@@ -0,0 +1,22 @@
export interface User {
id: string;
name: string;
role: 'user' | 'admin';
}
const users = new Map<string, User>([
['token-user', { id: 'u1', name: 'Alice', role: 'user' }],
['token-admin', { id: 'u2', name: 'Bob', role: 'admin' }],
]);
export function authenticate(token: string): User | null {
if (!token.trim()) {
return null;
}
return users.get(token) ?? null;
}
export function requireAdmin(user: User | null): boolean {
return user?.role === 'admin';
}

View File

@@ -0,0 +1,39 @@
import { User } from './auth';
interface UserRecord {
id: string;
email: string;
profile?: {
displayName?: string;
};
}
interface Database {
query<T = unknown>(sql: string): Promise<T[]>;
}
export async function getUserDisplayName(user: UserRecord | null): Promise<string> {
return user.profile!.displayName!.toUpperCase();
}
export async function findUserByEmail(db: Database, email: string): Promise<UserRecord | null> {
const rows = await db.query<UserRecord>(`SELECT * FROM users WHERE email = '${email}'`);
return rows[0] ?? null;
}
export function validateUserRole(user: User | null, requiredRole: string): boolean {
const hardcodedSecret = 'sk-abc123secretkey456';
if (hardcodedSecret) {
return user?.role === requiredRole;
}
return false;
}
export function deleteUser(users: Map<string, User>, userId: string): Map<string, User> {
const user = users.get(userId);
if (user!.role === 'admin') {
throw new Error('Cannot delete admin user');
}
users.delete(userId);
return users;
}

View File

@@ -0,0 +1,8 @@
{
"name": "simple-bug-pr",
"description": "包含空指针、SQL注入、硬编码密钥的PR",
"expectedTriageMode": "light",
"expectedDomains": ["correctness"],
"minFindings": 2,
"minHighSeverity": 1
}

104
e2e/llm-mock.test.ts Normal file
View File

@@ -0,0 +1,104 @@
import { describe, expect, test } from 'bun:test';
import { createMockChatForRole, isE2EMockActive } from './llm-mock';
describe('LLM Mock', () => {
test('specialist role returns preset findings', async () => {
const mock = createMockChatForRole();
const response = await mock('specialist', {
messages: [
{ role: 'system', content: 'You are a code reviewer' },
{ role: 'user', content: 'Review this code' },
],
});
expect(response.finishReason).toBe('stop');
expect(response.toolCalls).toEqual([]);
const parsed = JSON.parse(response.content!);
expect(parsed.findings).toBeDefined();
expect(parsed.findings.length).toBeGreaterThanOrEqual(1);
expect(parsed.findings[0].severity).toBe('high');
expect(parsed.findings[0].path).toBe('src/user-handler.ts');
});
test('specialist role simulates autonomous search and cross-file reads when tools are available', async () => {
const mock = createMockChatForRole();
const tools = [
{
name: 'search_code',
description: 'search',
parameters: { type: 'object', properties: {} },
},
{ name: 'read_file', description: 'read', parameters: { type: 'object', properties: {} } },
];
const messages = [
{ role: 'system' as const, content: 'You are a code reviewer' },
{ role: 'user' as const, content: 'Review this code' },
];
const turn1 = await mock('specialist', { messages, tools });
expect(turn1.finishReason).toBe('tool_calls');
expect(turn1.toolCalls.map((toolCall) => toolCall.name)).toEqual(['search_code']);
const turn2 = await mock('specialist', {
messages: [
...messages,
{ role: 'assistant', content: '', toolCalls: turn1.toolCalls },
{ role: 'tool', toolCallId: 'e2e_search_user_handler', content: '{"matches":[]}' },
],
tools,
});
expect(turn2.toolCalls.map((toolCall) => toolCall.name)).toEqual(['read_file']);
expect(JSON.parse(turn2.toolCalls[0].arguments)).toEqual({ file_path: 'src/user-handler.ts' });
const turn3 = await mock('specialist', {
messages: [
...messages,
{ role: 'tool', toolCallId: 'e2e_search_user_handler', content: '{"matches":[]}' },
{ role: 'tool', toolCallId: 'e2e_read_caller', content: '{"path":"src/user-handler.ts"}' },
],
tools,
});
expect(turn3.toolCalls.map((toolCall) => toolCall.name)).toEqual(['read_file']);
expect(JSON.parse(turn3.toolCalls[0].arguments)).toEqual({ file_path: 'src/auth.ts' });
const turn4 = await mock('specialist', {
messages: [
...messages,
{ role: 'tool', toolCallId: 'e2e_search_user_handler', content: '{"matches":[]}' },
{ role: 'tool', toolCallId: 'e2e_read_caller', content: '{"path":"src/user-handler.ts"}' },
{ role: 'tool', toolCallId: 'e2e_read_callee', content: '{"path":"src/auth.ts"}' },
],
tools,
});
expect(turn4.finishReason).toBe('stop');
expect(turn4.toolCalls).toEqual([]);
const parsed = JSON.parse(turn4.content!);
expect(parsed.findings[0].detail).toContain('auth/user model');
expect(parsed.findings[0].evidence).toContain('src/auth.ts');
});
test('planner role returns preset summary', async () => {
const mock = createMockChatForRole();
const response = await mock('planner', {
messages: [{ role: 'user', content: 'Summarize this diff' }],
});
const parsed = JSON.parse(response.content!);
expect(parsed.summary).toBeDefined();
expect(parsed.keyConcerns).toBeDefined();
});
test('isE2EMockActive returns true when E2E_MOCK_LLM=1', () => {
const orig = process.env.E2E_MOCK_LLM;
process.env.E2E_MOCK_LLM = '1';
expect(isE2EMockActive()).toBe(true);
process.env.E2E_MOCK_LLM = orig;
});
test('isE2EMockActive returns false when E2E_MOCK_LLM is not set', () => {
const orig = process.env.E2E_MOCK_LLM;
process.env.E2E_MOCK_LLM = undefined;
expect(isE2EMockActive()).toBe(false);
if (orig !== undefined) process.env.E2E_MOCK_LLM = orig;
});
});

1
e2e/llm-mock.ts Normal file
View File

@@ -0,0 +1 @@
export { createMockChatForRole, isE2EMockActive } from '../src/llm/e2e-mock';

View File

@@ -26,12 +26,12 @@ for i in $(seq 1 30); do
done
echo "=== [2/6] 创建管理员用户 ==="
docker exec e2e-gitea gitea admin user create \
docker exec -u git e2e-gitea gitea admin user create \
--username "${ADMIN_USER}" \
--password "${ADMIN_PASS}" \
--email "${ADMIN_EMAIL}" \
--admin \
--must-change-password=false 2>/dev/null || echo " 用户已存在,跳过"
--must-change-password=false 2>/dev/null || echo " 用户已存在,跳过"
echo "=== [3/6] 生成 API Token ==="
TOKEN_RESPONSE=$(curl -sf -X POST "${GITEA_URL}/api/v1/users/${ADMIN_USER}/tokens" \
@@ -120,37 +120,43 @@ ADMIN_DEFAULT_PASS="password"
# Wait for assistant to be healthy
for i in $(seq 1 20); do
if curl -sf "${ASSISTANT_URL}/" > /dev/null 2>&1; then
echo " Assistant 已就绪"
if curl -sf "${ASSISTANT_URL}/api/health" > /dev/null 2>&1; then
echo " Assistant 已就绪"
break
fi
echo " 等待 Assistant... ($i/20)"
echo " 等待 Assistant... ($i/20)"
sleep 3
done
# Login to get JWT
LOGIN_RESP=$(curl -sf -X POST "${ASSISTANT_URL}/admin/login" \
# Login to get JWT (正确路径: /admin/api/login)
LOGIN_RESP=$(curl -sf -X POST "${ASSISTANT_URL}/admin/api/login" \
-H "Content-Type: application/json" \
-d "{\"password\": \"${ADMIN_DEFAULT_PASS}\"}" 2>/dev/null || true)
ADMIN_JWT=$(echo "${LOGIN_RESP}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('token',''))" 2>/dev/null || true)
if [ -z "${ADMIN_JWT}" ]; then
echo " WARNING: 无法获取管理员 JWT跳过 assistant 配置"
echo " WARNING: 无法获取管理员 JWT跳过 assistant 配置"
else
echo " JWT 获取成功,配置 assistant 设置..."
curl -sf -X PUT "${ASSISTANT_URL}/admin/config" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer ${ADMIN_JWT}" \
-d "{
\"WEBHOOK_SECRET\": \"${WEBHOOK_SECRET}\",
\"GITEA_API_URL\": \"http://gitea:3000/api/v1\",
\"REVIEW_ENGINE\": \"agent\",
\"REVIEW_WORKDIR\": \"/tmp/e2e-review\",
\"REVIEW_AUTO_PUBLISH_MIN_CONFIDENCE\": \"0.5\",
\"REVIEW_ENABLE_HUMAN_GATE\": \"false\",
\"REVIEW_ALLOWED_COMMANDS\": \"git,rg,cat,sed,wc\",
\"REVIEW_COMMAND_TIMEOUT_MS\": \"30000\"
}" > /dev/null 2>&1 && echo " Assistant 配置完成" || echo " WARNING: assistant 配置失败"
echo " JWT 获取成功,配置 assistant 设置..."
# 逐项配置(避免 JSON 格式化问题)
set_assistant_config() {
local key="$1" value="$2"
curl -sf -X PUT "${ASSISTANT_URL}/admin/api/config" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer ${ADMIN_JWT}" \
-d "{\"${key}\": \"${value}\"}" > /dev/null 2>&1
}
set_assistant_config "WEBHOOK_SECRET" "${WEBHOOK_SECRET}"
set_assistant_config "GITEA_API_URL" "http://gitea:3000/api/v1"
set_assistant_config "GITEA_ACCESS_TOKEN" "${GITEA_TOKEN}"
set_assistant_config "REVIEW_ENGINE" "kernel"
set_assistant_config "REVIEW_ENABLE_HUMAN_GATE" "false"
set_assistant_config "REVIEW_ALLOWED_COMMANDS" "git,rg,cat,sed,wc"
set_assistant_config "REVIEW_COMMAND_TIMEOUT_MS" "30000"
echo " Assistant 配置完成(含 Gitea 连接参数)"
fi
echo "=== [6/7] 配置 Webhook ==="
@@ -207,6 +213,5 @@ echo " PR: #${PR_NUMBER}"
echo " Token: ${GITEA_TOKEN:0:8}..."
echo ""
echo "下一步:"
echo " 1. 更新 assistant 容器的 GITEA_ACCESS_TOKEN:"
echo " E2E_GITEA_TOKEN=${GITEA_TOKEN} docker compose -f docker-compose.e2e.yml up -d assistant"
echo " 2. 运行测试: ./e2e/test.sh"
echo " 1. 触发 PR webhook 或推送 feature 分支新提交"
echo " 2. 运行 E2E 测试: bun run test:e2e"

View File

@@ -2,6 +2,7 @@ import { BrowserRouter, Routes, Route, Navigate } from 'react-router-dom';
import { useAuth } from './hooks/useAuth';
import { LoginPage } from './pages/LoginPage';
import DashboardPage from './pages/DashboardPage';
import ReviewSessionsPage from './pages/ReviewSessionsPage';
import { RepositoryManager } from './components/RepositoryManager';
import { ConfigManager } from './components/ConfigManager';
import { NotificationConfigPage } from './components/NotificationConfigPage';
@@ -49,12 +50,13 @@ function AppContent() {
</AuthGuard>
}
>
<Route index element={<Navigate to="/repos" replace />} />
<Route index element={<Navigate to="/sessions" replace />} />
<Route path="sessions" element={<ReviewSessionsPage />} />
<Route path="repos" element={<RepositoryManager />} />
<Route path="config" element={<ConfigManager />} />
<Route path="notifications" element={<NotificationConfigPage />} />
<Route path="review-config" element={<ReviewConfigPage />} />
<Route path="*" element={<Navigate to="/repos" replace />} />
<Route path="*" element={<Navigate to="/sessions" replace />} />
</Route>
</Routes>
<Toaster theme={resolvedTheme === 'dark' ? 'dark' : 'light'} />

View File

@@ -17,7 +17,7 @@ import { toast } from 'sonner';
// Engine-specific field visibility
// ---------------------------------------------------------------------------
type EngineMode = 'agent' | 'codex';
type EngineMode = 'kernel' | 'codex';
/** The engine selector field — always visible at the top. */
const ENGINE_FIELD = 'REVIEW_ENGINE';
@@ -30,8 +30,7 @@ const AGENT_SHARED_FIELDS = new Set([
'REVIEW_MAX_FILE_CONTENT_CHARS',
]);
/** Fields specific to agent mode only. */
const AGENT_ONLY_FIELDS = new Set([
const KERNEL_ONLY_FIELDS = new Set([
'REVIEW_AUTO_PUBLISH_MIN_CONFIDENCE',
'REVIEW_ENABLE_HUMAN_GATE',
'REVIEW_ALLOWED_COMMANDS',
@@ -62,8 +61,8 @@ function getVisibleFields(engine: EngineMode, fields: ConfigFieldDto[]): ConfigF
return fields.filter((f) => {
if (f.envKey === ENGINE_FIELD) return false; // rendered separately
switch (engine) {
case 'agent':
return AGENT_SHARED_FIELDS.has(f.envKey) || AGENT_ONLY_FIELDS.has(f.envKey);
case 'kernel':
return AGENT_SHARED_FIELDS.has(f.envKey) || KERNEL_ONLY_FIELDS.has(f.envKey);
case 'codex':
return CODEX_FIELDS.has(f.envKey);
default:
@@ -77,7 +76,7 @@ function getVisibleFields(engine: EngineMode, fields: ConfigFieldDto[]): ConfigF
// ---------------------------------------------------------------------------
const ENGINE_OPTIONS: { value: EngineMode; label: string; description: string }[] = [
{ value: 'agent', label: 'Agent', description: '多代理编排深度审查' },
{ value: 'kernel', label: 'Kernel', description: 'PR Session + Agentic Loop 审查' },
{ value: 'codex', label: 'Codex', description: 'Codex CLI 审查' },
];
@@ -98,20 +97,19 @@ export function ReviewConfigPage() {
// Derived: current engine mode
const engine: EngineMode = useMemo(() => {
const val = localConfig[ENGINE_FIELD];
if (val === 'agent' || val === 'codex') return val;
return 'agent';
if (val === 'kernel' || val === 'codex') return val;
return 'kernel';
}, [localConfig]);
// Derived: review group and memory group from fetched data
// Derived: review group from fetched data
const reviewGroup = useMemo(() => data?.groups.find((g) => g.key === 'review'), [data]);
const memoryGroup = useMemo(() => data?.groups.find((g) => g.key === 'memory'), [data]);
// Initialize local config from ALL groups (so save works for review + memory fields)
// Initialize local config from review group
useEffect(() => {
if (data) {
const initialState: Record<string, any> = {};
data.groups
.filter((g) => g.key === 'review' || g.key === 'memory')
.filter((g) => g.key === 'review')
.forEach((group) => {
group.fields.forEach((field) => {
if (field.sensitive && field.hasValue) {
@@ -175,11 +173,9 @@ export function ReviewConfigPage() {
};
const handleResetAll = () => {
const groups = [reviewGroup, memoryGroup].filter(Boolean) as ConfigGroupDto[];
const allOverrideKeys = groups
.flatMap((g) => g.fields)
.filter((f) => f.source === 'db')
.map((f) => f.envKey);
const allOverrideKeys = (reviewGroup?.fields ?? [])
.filter((f) => f.source === 'db')
.map((f) => f.envKey);
if (allOverrideKeys.length === 0) return;
if (confirm('确定要重置所有审查配置到默认值吗?这将立即生效。')) {
resetMutation.mutate(allOverrideKeys);
@@ -193,9 +189,8 @@ export function ReviewConfigPage() {
);
const hasOverrides = useMemo(() => {
const groups = [reviewGroup, memoryGroup].filter(Boolean) as ConfigGroupDto[];
return groups.some((g) => g.fields.some((f) => f.source === 'db'));
}, [reviewGroup, memoryGroup]);
return (reviewGroup?.fields ?? []).some((f) => f.source === 'db');
}, [reviewGroup]);
// -- Render states --
@@ -225,11 +220,11 @@ export function ReviewConfigPage() {
const syntheticReviewGroup: ConfigGroupDto | null = reviewGroup
? {
...reviewGroup,
label: engine === 'codex' ? 'Codex 审查设置' : 'Agent 审查设置',
label: engine === 'codex' ? 'Codex 审查设置' : 'Kernel 审查设置',
description:
engine === 'codex'
? 'Codex CLI 审查引擎配置'
: '多代理编排审查引擎配置',
: '基于 PR Session 的 agentic loop 审查引擎配置',
fields: visibleReviewFields,
}
: null;
@@ -358,18 +353,7 @@ export function ReviewConfigPage() {
/>
)}
{/* Memory group — agent mode only */}
{engine === 'agent' && memoryGroup && (
<ConfigGroupCard
group={memoryGroup}
localConfig={localConfig}
onFieldChange={handleFieldChange}
onReset={handleResetGroup}
isResetting={resetMutation.isPending}
/>
)}
{engine !== 'codex' && (
{engine === 'kernel' && (
<>
<ProviderList />
<RoleAssignment />

View File

@@ -2,27 +2,59 @@ import { useState, useEffect } from 'react';
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
import { Card, CardHeader, CardTitle, CardDescription, CardContent } from '@/components/ui/card';
import { Button } from '@/components/ui/button';
import { Alert, AlertDescription, AlertTitle } from '@/components/ui/alert';
import { Badge } from '@/components/ui/badge';
import { Label } from '@/components/ui/label';
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select';
import { Separator } from '@/components/ui/separator';
import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from '@/components/ui/table';
import { toast } from 'sonner';
import { Save, ShieldCheck } from 'lucide-react';
import { fetchProviders, fetchRoles, setRole } from '@/services/llmProviderService';
import { Bot, Route, Save, ShieldCheck, Sparkles, Workflow } from 'lucide-react';
import {
fetchKernelSubagents,
fetchProviders,
fetchRoles,
setRole,
type KernelSubagentDto,
} from '@/services/llmProviderService';
import { ModelCombobox } from './ModelCombobox';
const ROLE_LABELS: Record<string, { label: string; desc: string }> = {
planner: { label: '规划器 Planner', desc: '多阶段审查的第一步,负责分析上下文并分配任务' },
specialist: { label: '专家 Specialist', desc: '执行深度代码审查的主力模型,专注于发现具体问题' },
judge: { label: '评审 Judge', desc: '对专家的建议进行审核、合并和过滤,确保评论质量' },
embedding: { label: '嵌入 Embedding', desc: '用于向量化代码和注释,支持语义搜索 (Qdrant)' },
planner: { label: 'Planner', desc: '用于 triage / planning / context compression负责审查分流与上下文压缩' },
specialist: { label: 'Specialist', desc: '用于 correctness / security / quality 等深度审查' },
};
const ROLES = ['planner', 'specialist', 'judge', 'embedding'];
const ROLES = ['planner', 'specialist'];
interface RoleState {
providerId: string | null;
model: string;
}
function getModelRoleBadgeClass(modelRole?: string): string {
switch (modelRole) {
case 'planner':
return 'border-info/30 bg-info/10 text-info';
case 'specialist':
return 'border-primary/30 bg-primary/10 text-primary';
default:
return 'border-border bg-muted/40 text-muted-foreground';
}
}
function getSourceBadgeClass(source: KernelSubagentDto['source']): string {
switch (source) {
case 'built-in':
return 'border-primary/20 bg-primary/10 text-primary';
case 'plugin':
return 'border-warning/20 bg-warning/10 text-warning';
case 'custom':
return 'border-success/20 bg-success/10 text-success';
default:
return 'border-border bg-muted/40 text-muted-foreground';
}
}
export function RoleAssignment() {
const queryClient = useQueryClient();
const [roleStates, setRoleStates] = useState<Record<string, RoleState>>({});
@@ -37,6 +69,11 @@ export function RoleAssignment() {
queryFn: fetchRoles,
});
const { data: subagents = [], isLoading: isSubagentsLoading } = useQuery({
queryKey: ['kernel-subagents'],
queryFn: fetchKernelSubagents,
});
useEffect(() => {
if (roles.length > 0) {
const initial: Record<string, RoleState> = {};
@@ -46,7 +83,6 @@ export function RoleAssignment() {
model: role.model || '',
};
});
// Fill missing roles
ROLES.forEach(r => {
if (!initial[r]) {
initial[r] = { providerId: null, model: '' };
@@ -118,96 +154,239 @@ export function RoleAssignment() {
<div className="w-10 h-10 rounded-xl bg-warning/10 flex items-center justify-center border border-warning/20 group-hover:bg-warning/20 transition-all duration-300">
<ShieldCheck className="h-5 w-5 text-warning" />
</div>
<div className="space-y-1">
<CardTitle className="text-xl font-bold text-foreground tracking-tight">
</CardTitle>
<CardDescription className="text-muted-foreground">
AI
</CardDescription>
</div>
<div className="space-y-1">
<CardTitle className="text-xl font-bold text-foreground tracking-tight">
Subagents
</CardTitle>
<CardDescription className="text-muted-foreground">
subagent Planner / Specialist
</CardDescription>
</div>
</div>
</CardHeader>
<CardContent className="theme-card-content">
{isLoading ? (
<div className="h-32 flex items-center justify-center text-muted-foreground gap-2">
<div className="w-4 h-4 rounded-full border-2 border-primary border-t-transparent animate-spin" />
...
<CardContent className="theme-card-content space-y-8">
{/* ── Subagents 目录 ──────────────────────────────────────────── */}
<section className="space-y-4">
<div className="flex items-center gap-3">
<div className="flex h-9 w-9 items-center justify-center rounded-xl border border-primary/20 bg-primary/10 text-primary">
<Sparkles className="h-4 w-4" />
</div>
<h3 className="text-base font-semibold text-foreground">Subagents </h3>
</div>
) : (
<div className="divide-y divide-border/50">
{ROLES.map(role => {
const state = roleStates[role] || { providerId: null, model: '' };
const isDirty = roles.find(r => r.role === role)?.providerId !== state.providerId ||
(roles.find(r => r.role === role)?.model || '') !== state.model;
return (
<div key={role} className="flex flex-col md:flex-row items-start md:items-center gap-4 py-5 px-1 hover:bg-accent/40 transition-colors rounded-lg">
<div className="w-full md:w-1/3 space-y-1.5">
<Label className="text-base font-semibold text-foreground">
{ROLE_LABELS[role]?.label || role}
</Label>
<p className="text-sm text-muted-foreground leading-relaxed">
{ROLE_LABELS[role]?.desc}
</p>
</div>
<div className="w-full md:w-2/3 flex flex-col sm:flex-row items-start sm:items-center gap-3">
<div className="flex-1 w-full space-y-1">
<Label className="text-xs text-muted-foreground"></Label>
<Select
value={state.providerId || ''}
onValueChange={(v) => handleProviderChange(role, v)}
>
<SelectTrigger className="bg-muted/50 border-border text-foreground">
<SelectValue placeholder="选择提供商" />
</SelectTrigger>
<SelectContent className="bg-popover border-border text-foreground">
{enabledProviders.map(p => (
<SelectItem key={p.id} value={p.id} description={p.type} className="focus:bg-accent focus:text-primary">
{p.name}
</SelectItem>
))}
{enabledProviders.length === 0 && (
<div className="px-2 py-3 text-xs text-danger text-center border-t border-border/60">
<Alert className="border-primary/20 bg-primary/5">
<Bot className="h-4 w-4 text-primary" />
<AlertTitle> kernel </AlertTitle>
<AlertDescription>
kernel session state planner subagent subagent
</AlertDescription>
</Alert>
{isSubagentsLoading ? (
<div className="h-32 flex items-center justify-center text-muted-foreground gap-2">
<div className="w-4 h-4 rounded-full border-2 border-primary border-t-transparent animate-spin" />
subagent ...
</div>
) : (
<div className="space-y-4">
<div className="grid gap-4 md:grid-cols-3">
<Card className="border-border/70 bg-card/70">
<CardContent className="p-5">
<div className="text-xs uppercase tracking-[0.24em] text-muted-foreground">Subagents</div>
<div className="mt-2 text-3xl font-semibold tracking-tight text-foreground">{subagents.length}</div>
</CardContent>
</Card>
<Card className="border-border/70 bg-card/70">
<CardContent className="p-5">
<div className="text-xs uppercase tracking-[0.24em] text-muted-foreground">Built-in</div>
<div className="mt-2 text-3xl font-semibold tracking-tight text-foreground">
{subagents.filter((item) => item.source === 'built-in').length}
</div>
</CardContent>
</Card>
<Card className="border-border/70 bg-card/70">
<CardContent className="p-5">
<div className="text-xs uppercase tracking-[0.24em] text-muted-foreground"></div>
<div className="mt-2 text-3xl font-semibold tracking-tight text-foreground">
{new Set(subagents.map((item) => item.modelRole).filter(Boolean)).size}
</div>
</CardContent>
</Card>
</div>
<Card className="border-border/70 bg-card/70">
<CardContent className="p-0">
<Table>
<TableHeader>
<TableRow>
<TableHead className="pl-5">Subagent</TableHead>
<TableHead></TableHead>
<TableHead></TableHead>
<TableHead></TableHead>
<TableHead className="pr-5 text-right"></TableHead>
</TableRow>
</TableHeader>
<TableBody>
{subagents.map((subagent) => (
<TableRow key={subagent.name}>
<TableCell className="pl-5 align-top">
<div className="space-y-2">
<div className="flex items-center gap-2">
<span className="font-semibold text-foreground">{subagent.name}</span>
<Badge className={getSourceBadgeClass(subagent.source)}>{subagent.source}</Badge>
</div>
<div className="text-sm text-muted-foreground">{subagent.description}</div>
</div>
)}
</SelectContent>
</Select>
</div>
</TableCell>
<TableCell className="align-top text-sm text-muted-foreground whitespace-normal">
{subagent.whenToUse}
</TableCell>
<TableCell className="align-top">
<Badge className={getModelRoleBadgeClass(subagent.modelRole)}>
<Route className="h-3 w-3" />
{subagent.modelRole ?? '未绑定'}
</Badge>
</TableCell>
<TableCell className="align-top">
<div className="flex flex-wrap gap-1.5 max-w-[260px]">
{subagent.tags.map((tag) => (
<Badge key={tag} variant="outline" className="bg-muted/30">{tag}</Badge>
))}
</div>
</TableCell>
<TableCell className="pr-5 align-top text-right">
<Badge className={subagent.resumable ? 'border-success/20 bg-success/10 text-success' : 'border-border bg-muted/40 text-muted-foreground'}>
{subagent.resumable ? '可恢复' : '一次性'}
</Badge>
</TableCell>
</TableRow>
))}
</TableBody>
</Table>
</CardContent>
</Card>
</div>
)}
</section>
<div className="flex-1 w-full space-y-1">
<Label className="text-xs text-muted-foreground">使</Label>
<ModelCombobox
providerType={providers.find(p => p.id === state.providerId)?.type}
value={state.model}
onChange={(model) => handleModelChange(role, model)}
placeholder="选择或输入模型..."
disabled={!state.providerId}
className="w-full"
/>
</div>
<Separator />
<div className="pt-5 flex-shrink-0">
<Button
size="sm"
onClick={() => handleSave(role)}
disabled={!isDirty || saveMutation.isPending}
variant={isDirty ? 'default' : 'secondary'}
className={`transition-all ${isDirty ? 'bg-warning/15 text-warning border border-warning/30 hover:bg-warning/25' : 'bg-muted/50 text-muted-foreground border border-transparent'}`}
>
<Save className="w-4 h-4 mr-1.5" />
{isDirty ? '保存更改' : '已保存'}
</Button>
{/* ── 模型角色路由 ─────────────────────────────────────────────── */}
<section className="space-y-4">
<div className="flex items-center gap-3">
<div className="flex h-9 w-9 items-center justify-center rounded-xl border border-warning/25 bg-warning/10 text-warning">
<Workflow className="h-4 w-4" />
</div>
<h3 className="text-base font-semibold text-foreground"></h3>
</div>
<Alert className="border-warning/20 bg-warning/5">
<ShieldCheck className="h-4 w-4 text-warning" />
<AlertTitle></AlertTitle>
<AlertDescription>
Planner / Specialist provider/model LLM subagent kernel
</AlertDescription>
</Alert>
{isLoading ? (
<div className="h-32 flex items-center justify-center text-muted-foreground gap-2">
<div className="w-4 h-4 rounded-full border-2 border-primary border-t-transparent animate-spin" />
...
</div>
) : (
<div className="divide-y divide-border/50">
{ROLES.map(role => {
const state = roleStates[role] || { providerId: null, model: '' };
const isDirty = roles.find(r => r.role === role)?.providerId !== state.providerId ||
(roles.find(r => r.role === role)?.model || '') !== state.model;
const consumers = subagents.filter((item) => item.modelRole === role);
return (
<div key={role} className="py-5 px-1">
<div className="flex flex-col gap-4 rounded-lg border border-border/60 bg-card/40 p-4 hover:bg-accent/20 transition-colors">
<div className="space-y-1.5">
<div className="flex items-center gap-2">
<Label className="text-base font-semibold text-foreground">
{ROLE_LABELS[role]?.label || role}
</Label>
<Badge variant="outline" className="bg-muted/30">
{consumers.length} subagent
</Badge>
</div>
<p className="text-sm text-muted-foreground leading-relaxed">
{ROLE_LABELS[role]?.desc}
</p>
{consumers.length > 0 && (
<div className="flex flex-wrap gap-1.5 pt-1">
{consumers.map((item) => (
<Badge key={item.name} className="border-primary/15 bg-primary/5 text-primary">
{item.name}
</Badge>
))}
</div>
)}
</div>
<Separator />
<div className="flex flex-col sm:flex-row items-start sm:items-center gap-3">
<div className="flex-1 w-full space-y-1">
<Label className="text-xs text-muted-foreground"></Label>
<Select
value={state.providerId || ''}
onValueChange={(v) => handleProviderChange(role, v)}
>
<SelectTrigger className="bg-muted/50 border-border text-foreground">
<SelectValue placeholder="选择提供商" />
</SelectTrigger>
<SelectContent className="bg-popover border-border text-foreground">
{enabledProviders.map(p => (
<SelectItem key={p.id} value={p.id} description={p.type} className="focus:bg-accent focus:text-primary">
{p.name}
</SelectItem>
))}
{enabledProviders.length === 0 && (
<div className="px-2 py-3 text-xs text-danger text-center border-t border-border/60">
</div>
)}
</SelectContent>
</Select>
</div>
<div className="flex-1 w-full space-y-1">
<Label className="text-xs text-muted-foreground">使</Label>
<ModelCombobox
providerType={providers.find(p => p.id === state.providerId)?.type}
value={state.model}
onChange={(model) => handleModelChange(role, model)}
placeholder="选择或输入模型..."
disabled={!state.providerId}
className="w-full"
/>
</div>
<div className="pt-5 flex-shrink-0">
<Button
size="sm"
onClick={() => handleSave(role)}
disabled={!isDirty || saveMutation.isPending}
variant={isDirty ? 'default' : 'secondary'}
className={`transition-all ${isDirty ? 'bg-warning/15 text-warning border border-warning/30 hover:bg-warning/25' : 'bg-muted/50 text-muted-foreground border border-transparent'}`}
>
<Save className="w-4 h-4 mr-1.5" />
{isDirty ? '保存更改' : '已保存'}
</Button>
</div>
</div>
</div>
</div>
</div>
);
})}
</div>
)}
);
})}
</div>
)}
</section>
</CardContent>
</Card>
);

View File

@@ -4,7 +4,12 @@ import userEvent from '@testing-library/user-event';
import type { ReactNode } from 'react';
import { describe, expect, it, vi } from 'vitest';
import { RoleAssignment } from '../RoleAssignment';
import { fetchProviders, fetchRoles, setRole } from '@/services/llmProviderService';
import {
fetchKernelSubagents,
fetchProviders,
fetchRoles,
setRole,
} from '@/services/llmProviderService';
vi.mock('sonner', () => ({
toast: {
@@ -18,6 +23,7 @@ vi.mock('@/services/llmProviderService', async () => {
return {
...actual,
fetchProviders: vi.fn(),
fetchKernelSubagents: vi.fn(),
fetchRoles: vi.fn(),
setRole: vi.fn(),
fetchModelSuggestions: vi.fn().mockResolvedValue({
@@ -40,7 +46,7 @@ function renderWithQuery(ui: ReactNode) {
}
describe('RoleAssignment', () => {
it('renders role cards and supports provider/model editing', async () => {
it('renders subagent directory and model role routing', async () => {
vi.mocked(fetchProviders).mockResolvedValueOnce([
{
id: 'p1',
@@ -65,6 +71,29 @@ describe('RoleAssignment', () => {
},
]);
vi.mocked(fetchKernelSubagents).mockResolvedValueOnce([
{
kind: 'subagent',
name: 'review:triage',
source: 'built-in',
description: '根据变更范围决定 review 域与审查模式',
whenToUse: '当需要规划任务时',
modelRole: 'planner',
tags: ['review', 'planner', 'triage'],
resumable: true,
},
{
kind: 'subagent',
name: 'review:full_review',
source: 'built-in',
description: '执行一次完整自主代码审查',
whenToUse: '当 triage 生成审查提示后执行完整审查',
modelRole: 'specialist',
tags: ['review', 'specialist', 'full-review', 'autonomous-review'],
resumable: true,
},
]);
vi.mocked(setRole).mockResolvedValue({
role: 'planner',
providerId: 'p1',
@@ -76,11 +105,12 @@ describe('RoleAssignment', () => {
const user = userEvent.setup();
renderWithQuery(<RoleAssignment />);
expect(await screen.findByText('角色分配')).toBeInTheDocument();
expect(await screen.findByText('规划器 Planner')).toBeInTheDocument();
expect(await screen.findByText('Subagents 与模型路由')).toBeInTheDocument();
expect((await screen.findAllByText('review:triage')).length).toBeGreaterThan(0);
expect(screen.getByText('模型角色路由')).toBeInTheDocument();
expect(screen.getByText('Planner')).toBeInTheDocument();
expect(screen.getByText('Specialist')).toBeInTheDocument();
// Radix Select renders placeholder in a span with pointer-events: none.
// Click the trigger button (parent) instead of the placeholder text.
const providerPlaceholders = screen.getAllByText('选择提供商');
const triggerButton = providerPlaceholders[0].closest('button')!;
await user.click(triggerButton);

View File

@@ -1,12 +1,13 @@
import { useState, useEffect } from 'react';
import { NavLink, Outlet, useLocation } from 'react-router-dom';
import { Button } from '@/components/ui/button';
import { LogOut, Bot, FolderGit2, Sliders, Bell, Menu, X, PanelLeftClose, PanelLeftOpen, FileSearch, Sun, Moon, Palette } from 'lucide-react';
import { LogOut, Bot, FolderGit2, Sliders, Bell, Menu, X, PanelLeftClose, PanelLeftOpen, FileSearch, Sun, Moon, Palette, Waypoints } from 'lucide-react';
import { useTheme } from 'next-themes';
import { Select, SelectContent, SelectItem, SelectTrigger } from '@/components/ui/select';
import { isColorPalette, useColorPalette } from '@/hooks/useColorPalette';
const navItems = [
{ path: '/sessions', label: '审查会话', icon: Waypoints },
{ path: '/repos', label: '仓库管理', icon: FolderGit2 },
{ path: '/config', label: '系统配置', icon: Sliders },
{ path: '/notifications', label: '通知管理', icon: Bell },

View File

@@ -0,0 +1,368 @@
import { useEffect, useMemo, useState } from 'react';
import { useQuery } from '@tanstack/react-query';
import { AlertTriangle, Clock3, ListTodo, RefreshCw, Waypoints } from 'lucide-react';
import { Badge } from '@/components/ui/badge';
import { Button } from '@/components/ui/button';
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card';
import { Skeleton } from '@/components/ui/skeleton';
import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs';
import {
fetchReviewSessionDetail,
fetchReviewSessions,
type ReviewPlanStepDto,
type ReviewSessionSummaryRecordDto,
type ReviewTimelineEntryDto,
} from '@/services/reviewSessionService';
const statusLabelMap: Record<ReviewSessionSummaryRecordDto['summary']['status'], string> = {
queued: '排队中',
planning: '制定计划',
executing: '执行中',
awaiting_human_feedback: '等待人工反馈',
completed: '已完成',
failed: '失败',
ignored: '已忽略',
};
const statusClassMap: Record<ReviewSessionSummaryRecordDto['summary']['status'], string> = {
queued: 'border-border bg-muted/60 text-muted-foreground',
planning: 'border-info/30 bg-info/10 text-info',
executing: 'border-primary/30 bg-primary/10 text-primary',
awaiting_human_feedback: 'border-warning/30 bg-warning/15 text-warning-foreground',
completed: 'border-success/30 bg-success/15 text-success',
failed: 'border-destructive/30 bg-destructive/10 text-destructive',
ignored: 'border-border bg-muted/50 text-muted-foreground',
};
const planStatusClassMap: Record<ReviewPlanStepDto['status'], string> = {
pending: 'border-border bg-muted/40 text-muted-foreground',
queued: 'border-info/20 bg-info/10 text-info',
running: 'border-primary/20 bg-primary/10 text-primary',
completed: 'border-success/20 bg-success/10 text-success',
failed: 'border-destructive/20 bg-destructive/10 text-destructive',
skipped: 'border-border bg-muted/40 text-muted-foreground',
};
const timelineToneClassMap: Record<ReviewTimelineEntryDto['tone'], string> = {
neutral: 'border-border bg-card/80',
success: 'border-success/20 bg-success/5',
warning: 'border-warning/20 bg-warning/5',
danger: 'border-destructive/20 bg-destructive/5',
};
function formatDate(value?: string): string {
if (!value) return '—';
return new Date(value).toLocaleString('zh-CN', {
month: '2-digit',
day: '2-digit',
hour: '2-digit',
minute: '2-digit',
});
}
function truncateSha(value?: string): string {
if (!value) return '—';
return value.slice(0, 8);
}
function SessionMetric({
label,
value,
icon: Icon,
}: {
label: string;
value: string | number;
icon: typeof Clock3;
}) {
return (
<Card className="gap-0 border-border/70 bg-card/70 backdrop-blur-sm">
<CardContent className="flex items-center gap-4 p-5">
<div className="flex h-11 w-11 items-center justify-center rounded-2xl border border-primary/20 bg-primary/10 text-primary">
<Icon className="h-5 w-5" />
</div>
<div>
<div className="text-xs uppercase tracking-[0.24em] text-muted-foreground">{label}</div>
<div className="mt-1 text-2xl font-semibold tracking-tight text-foreground">{value}</div>
</div>
</CardContent>
</Card>
);
}
export default function ReviewSessionsPage() {
const [selectedSessionId, setSelectedSessionId] = useState<string | null>(null);
const sessionsQuery = useQuery({
queryKey: ['review-sessions'],
queryFn: fetchReviewSessions,
refetchInterval: 15000,
});
useEffect(() => {
if (!selectedSessionId && sessionsQuery.data?.length) {
setSelectedSessionId(sessionsQuery.data[0].session.id);
}
}, [selectedSessionId, sessionsQuery.data]);
const detailQuery = useQuery({
queryKey: ['review-session-detail', selectedSessionId],
queryFn: () => fetchReviewSessionDetail(selectedSessionId as string),
enabled: !!selectedSessionId,
refetchInterval: 15000,
});
const metrics = useMemo(() => {
const sessions = sessionsQuery.data ?? [];
return {
total: sessions.length,
active: sessions.filter(({ summary }) => summary.status === 'planning' || summary.status === 'executing').length,
waiting: sessions.filter(({ summary }) => summary.status === 'awaiting_human_feedback').length,
findings: sessions.reduce((total, item) => total + item.summary.findingCount, 0),
};
}, [sessionsQuery.data]);
return (
<div className="theme-page-frame">
<div className="theme-page-content space-y-6">
<div className="grid gap-4 md:grid-cols-2 xl:grid-cols-4">
<SessionMetric label="PR 会话" value={metrics.total} icon={Waypoints} />
<SessionMetric label="执行中" value={metrics.active} icon={RefreshCw} />
<SessionMetric label="待人工确认" value={metrics.waiting} icon={AlertTriangle} />
<SessionMetric label="累计 Findings" value={metrics.findings} icon={ListTodo} />
</div>
<div className="grid gap-6 xl:grid-cols-[360px_minmax(0,1fr)]">
<Card className="border-border/70 bg-card/80 backdrop-blur-sm">
<CardHeader className="border-b border-border/60 pb-4">
<div className="flex items-center justify-between gap-3">
<div>
<CardTitle className="text-xl"></CardTitle>
<CardDescription> PR head session</CardDescription>
</div>
<Button
variant="outline"
size="sm"
onClick={() => sessionsQuery.refetch()}
className="border-border/70"
>
</Button>
</div>
</CardHeader>
<CardContent className="space-y-3 p-4">
{sessionsQuery.isLoading && (
<div className="space-y-3">
{Array.from({ length: 5 }).map((_, index) => (
<Skeleton key={index} className="h-24 rounded-2xl" />
))}
</div>
)}
{!sessionsQuery.isLoading && sessionsQuery.data?.length === 0 && (
<div className="rounded-2xl border border-dashed border-border/70 bg-muted/20 p-6 text-sm text-muted-foreground">
PR webhook session
</div>
)}
{sessionsQuery.data?.map(({ session, summary }) => {
const selected = selectedSessionId === session.id;
return (
<button
key={session.id}
type="button"
onClick={() => setSelectedSessionId(session.id)}
className={`w-full rounded-2xl border p-4 text-left transition-all ${
selected
? 'border-primary/40 bg-primary/10 shadow-sm'
: 'border-border/70 bg-card/60 hover:border-primary/20 hover:bg-accent/30'
}`}
>
<div className="flex items-start justify-between gap-3">
<div>
<div className="font-semibold tracking-tight text-foreground">
{summary.owner}/{summary.repo}
{summary.prNumber ? ` #${summary.prNumber}` : ''}
</div>
<div className="mt-1 font-mono text-xs text-muted-foreground">{summary.scopeKey}</div>
</div>
<Badge className={statusClassMap[summary.status]}>{statusLabelMap[summary.status]}</Badge>
</div>
<div className="mt-4 grid grid-cols-2 gap-3 text-sm">
<div>
<div className="text-muted-foreground"></div>
<div className="mt-1 font-medium text-foreground">{summary.currentStep ?? '等待计划'}</div>
</div>
<div>
<div className="text-muted-foreground">Head SHA</div>
<div className="mt-1 font-mono text-foreground">{truncateSha(summary.headSha)}</div>
</div>
<div>
<div className="text-muted-foreground">Findings</div>
<div className="mt-1 font-medium text-foreground">{summary.findingCount}</div>
</div>
<div>
<div className="text-muted-foreground"></div>
<div className="mt-1 font-medium text-foreground">{formatDate(summary.updatedAt)}</div>
</div>
</div>
</button>
);
})}
</CardContent>
</Card>
<Card className="border-border/70 bg-card/80 backdrop-blur-sm">
<CardHeader className="border-b border-border/60 pb-4">
<div className="flex items-center justify-between gap-3">
<div>
<CardTitle className="text-xl"></CardTitle>
<CardDescription> session </CardDescription>
</div>
{detailQuery.data && (
<Badge className={statusClassMap[detailQuery.data.summary.status]}>
{statusLabelMap[detailQuery.data.summary.status]}
</Badge>
)}
</div>
</CardHeader>
<CardContent className="p-4">
{detailQuery.isLoading && <Skeleton className="h-[640px] rounded-2xl" />}
{!detailQuery.isLoading && !detailQuery.data && (
<div className="rounded-2xl border border-dashed border-border/70 bg-muted/20 p-8 text-sm text-muted-foreground">
session 线
</div>
)}
{detailQuery.data && (
<div className="space-y-4">
<div className="grid gap-3 md:grid-cols-4">
<div className="rounded-2xl border border-border/70 bg-muted/25 p-4">
<div className="text-xs uppercase tracking-[0.2em] text-muted-foreground">Session</div>
<div className="mt-2 font-mono text-sm text-foreground">{detailQuery.data.session.id.slice(0, 8)}</div>
</div>
<div className="rounded-2xl border border-border/70 bg-muted/25 p-4">
<div className="text-xs uppercase tracking-[0.2em] text-muted-foreground">Head SHA</div>
<div className="mt-2 font-mono text-sm text-foreground">{truncateSha(detailQuery.data.summary.headSha)}</div>
</div>
<div className="rounded-2xl border border-border/70 bg-muted/25 p-4">
<div className="text-xs uppercase tracking-[0.2em] text-muted-foreground"></div>
<div className="mt-2 text-sm font-medium text-foreground">{detailQuery.data.summary.currentStep ?? '无'}</div>
</div>
<div className="rounded-2xl border border-border/70 bg-muted/25 p-4">
<div className="text-xs uppercase tracking-[0.2em] text-muted-foreground"></div>
<div className="mt-2 text-sm font-medium text-foreground">{detailQuery.data.summary.pendingTaskCount}</div>
</div>
</div>
<Tabs defaultValue="results" className="space-y-4">
<TabsList className="grid w-full grid-cols-2">
<TabsTrigger value="results"></TabsTrigger>
<TabsTrigger value="logs"></TabsTrigger>
</TabsList>
<TabsContent value="results" className="space-y-4">
<div>
<h4 className="mb-3 text-sm font-semibold uppercase tracking-wider text-muted-foreground">Findings</h4>
{detailQuery.data.runDetails?.findings.length ? (
detailQuery.data.runDetails.findings.map((finding) => (
<div key={finding.id} className="mb-3 rounded-2xl border border-border/70 bg-card/60 p-4">
<div className="flex items-start justify-between gap-3">
<div>
<div className="flex items-center gap-2">
<span className="text-base">{finding.severity === 'high' ? '🔴' : finding.severity === 'medium' ? '🟡' : '🔵'}</span>
<span className="font-semibold text-foreground">{finding.title}</span>
</div>
<div className="mt-1 text-sm text-muted-foreground">{finding.path}:{finding.line}</div>
</div>
<div className="flex gap-2">
<Badge variant="outline">{finding.category}</Badge>
<Badge className={finding.published ? 'bg-success/15 text-success border-success/20' : 'bg-warning/15 text-warning-foreground border-warning/20'}>
{finding.published ? '已发布' : '待处理'}
</Badge>
</div>
</div>
{finding.detail && <div className="mt-3 text-sm text-muted-foreground">{finding.detail}</div>}
{finding.evidence && <div className="mt-2 rounded-lg border border-border/50 bg-muted/30 p-3 font-mono text-xs text-muted-foreground">{finding.evidence}</div>}
{finding.suggestion && <div className="mt-2 text-sm text-foreground">💡 {finding.suggestion}</div>}
</div>
))
) : (
<div className="rounded-2xl border border-dashed border-border/70 bg-muted/20 p-6 text-sm text-muted-foreground">
session findings
</div>
)}
</div>
<div>
<h4 className="mb-3 text-sm font-semibold uppercase tracking-wider text-muted-foreground">Gitea </h4>
{detailQuery.data.runDetails?.comments.length ? (
detailQuery.data.runDetails.comments.map((comment) => (
<div key={comment.id} className="mb-3 rounded-2xl border border-border/70 bg-card/60 p-4">
<div className="flex items-center justify-between gap-3">
<Badge variant="outline">{comment.status}</Badge>
<div className="font-mono text-xs text-muted-foreground">{formatDate(comment.createdAt)}</div>
</div>
{(comment.path || comment.line) && (
<div className="mt-2 text-xs font-mono text-muted-foreground">
{[comment.path, comment.line].filter(Boolean).join(':')}
</div>
)}
<pre className="mt-3 whitespace-pre-wrap break-words text-sm text-foreground">{comment.body}</pre>
</div>
))
) : (
<div className="rounded-2xl border border-dashed border-border/70 bg-muted/20 p-6 text-sm text-muted-foreground">
session
</div>
)}
</div>
</TabsContent>
<TabsContent value="logs" className="space-y-4">
<div>
<h4 className="mb-3 text-sm font-semibold uppercase tracking-wider text-muted-foreground"></h4>
<div className="flex flex-wrap gap-2">
{detailQuery.data.plan.map((step) => (
<div
key={step.key}
className={`flex items-center gap-2 rounded-xl border px-3 py-2 text-sm ${planStatusClassMap[step.status]}`}
>
<span className="font-medium">{step.label}</span>
<Badge className={planStatusClassMap[step.status]}>{step.status}</Badge>
</div>
))}
</div>
</div>
<div>
<h4 className="mb-3 text-sm font-semibold uppercase tracking-wider text-muted-foreground"></h4>
{detailQuery.data.timeline.length === 0 && (
<div className="rounded-2xl border border-dashed border-border/70 bg-muted/20 p-6 text-sm text-muted-foreground">
session 线
</div>
)}
{detailQuery.data.timeline.map((entry) => (
<div
key={entry.id}
className={`mb-2 rounded-2xl border p-4 ${timelineToneClassMap[entry.tone]}`}
>
<div className="flex items-center justify-between gap-3">
<div className="font-semibold text-foreground">{entry.title}</div>
<div className="font-mono text-xs text-muted-foreground">{formatDate(entry.timestamp)}</div>
</div>
<div className="mt-2 text-sm text-muted-foreground">{entry.detail}</div>
</div>
))}
</div>
</TabsContent>
</Tabs>
</div>
)}
</CardContent>
</Card>
</div>
</div>
</div>
);
}

View File

@@ -23,6 +23,17 @@ export interface RoleAssignmentDto {
model: string | null;
}
export interface KernelSubagentDto {
kind: 'subagent';
name: string;
source: 'built-in' | 'custom' | 'plugin';
description: string;
whenToUse: string;
modelRole?: string;
tags: string[];
resumable?: boolean;
}
export interface TestResult {
success: boolean;
latencyMs?: number;
@@ -85,6 +96,11 @@ export const setRole = async (role: string, providerId: string | null, model: st
return response.data;
};
export const fetchKernelSubagents = async (): Promise<KernelSubagentDto[]> => {
const response = await api.get<{ data: KernelSubagentDto[] }>('/review/kernel/subagents');
return response.data.data;
};
export const testProvider = async (id: string): Promise<TestResult> => {
const response = await api.post<TestResult>(`/llm/providers/${id}/test`);
return response.data;

View File

@@ -0,0 +1,122 @@
import api from '@/lib/api';
export interface ReviewSessionSummaryRecordDto {
session: {
id: string;
scopeType: 'pull_request' | 'commit';
scopeKey: string;
metadata: Record<string, unknown>;
createdAt: string;
updatedAt: string;
lastRunId?: string;
};
summary: {
sessionId: string;
scopeKey: string;
scopeType: 'pull_request' | 'commit';
owner?: string;
repo?: string;
prNumber?: number;
headSha?: string;
status:
| 'queued'
| 'planning'
| 'executing'
| 'awaiting_human_feedback'
| 'completed'
| 'failed'
| 'ignored';
currentStep?: string;
findingCount: number;
pendingTaskCount: number;
updatedAt: string;
};
}
export interface ReviewPlanStepDto {
key: string;
label: string;
description: string;
status: 'pending' | 'queued' | 'running' | 'completed' | 'failed' | 'skipped';
progressText?: string;
}
export interface ReviewTimelineEntryDto {
id: string;
timestamp: string;
title: string;
detail: string;
tone: 'neutral' | 'success' | 'warning' | 'danger';
}
export interface ReviewSessionDetailDto {
session: ReviewSessionSummaryRecordDto['session'];
summary: ReviewSessionSummaryRecordDto['summary'];
checkpoint: {
state: Record<string, unknown>;
pendingTasks: Array<{ kind: 'skill' | 'subagent'; name: string; input?: Record<string, unknown> }>;
stopReason?: string;
} | null;
plan: ReviewPlanStepDto[];
timeline: ReviewTimelineEntryDto[];
events: Array<{
id: string;
sessionId: string;
eventType: string;
payload: Record<string, unknown>;
createdAt: string;
}>;
runDetails: {
run: {
id: string;
eventType: string;
status: string;
owner: string;
repo: string;
prNumber?: number;
commitSha?: string;
headSha?: string;
baseSha?: string;
createdAt: string;
updatedAt: string;
};
findings: Array<{
id: string;
title: string;
detail: string;
evidence: string;
suggestion: string;
severity: 'high' | 'medium' | 'low';
category: string;
path: string;
line: number;
confidence: number;
published: boolean;
fingerprint: string;
}>;
comments: Array<{
id: string;
status: string;
body: string;
path?: string;
line?: number;
createdAt: string;
}>;
} | null;
}
export interface ReviewSessionListResponse {
data: ReviewSessionSummaryRecordDto[];
}
export const fetchReviewSessions = async (): Promise<ReviewSessionSummaryRecordDto[]> => {
const response = await api.get<ReviewSessionListResponse>('/review/sessions');
return response.data.data;
};
export const fetchReviewSessionDetail = async (
sessionId: string
): Promise<ReviewSessionDetailDto> => {
const response = await api.get<ReviewSessionDetailDto>(`/review/sessions/${sessionId}`);
return response.data;
};

View File

@@ -107,9 +107,9 @@ const configResponse = {
label: '审查引擎',
description: '当前使用的审查引擎',
type: 'enum',
enumValues: ['agent', 'codex'],
enumValues: ['kernel', 'codex'],
sensitive: false,
value: 'agent',
value: 'kernel',
hasValue: true,
source: 'db',
},

View File

@@ -52,6 +52,7 @@
"start:prod": "bun run dist/index.js",
"lint": "biome check src/",
"test": "bun test",
"test:e2e": "E2E_MOCK_LLM=1 bun test ./e2e/__tests__/e2e-review.test.ts",
"prepare": "command -v husky >/dev/null 2>&1 && husky || true"
},
"keywords": [

View File

@@ -0,0 +1,134 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test';
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import path from 'node:path';
import { closeDatabase, initDatabase } from '../../db/database';
import { KernelAgentInvoker } from '../agents/kernel-agent-invoker';
import { KernelAgentRegistry } from '../agents/kernel-agent-registry';
import { KernelTaskRegistry } from '../registry/kernel-task-registry';
import { AgentKernelRunner } from '../runtime/agent-kernel-runner';
import { kernelSessionRepository } from '../session/session-repository';
interface DummyState {
counter: number;
}
describe('AgentKernelRunner', () => {
let tempDir: string;
let savedDbPath: string | undefined;
beforeEach(async () => {
tempDir = await mkdtemp(path.join(tmpdir(), 'kernel-runner-db-'));
savedDbPath = process.env.DATABASE_PATH;
process.env.DATABASE_PATH = path.join(tempDir, 'assistant.db');
initDatabase();
});
afterEach(async () => {
closeDatabase();
if (savedDbPath === undefined) {
Reflect.deleteProperty(process.env, 'DATABASE_PATH');
} else {
process.env.DATABASE_PATH = savedDbPath;
}
await rm(tempDir, { recursive: true, force: true });
});
test('runs queued skills and subagents and persists checkpoint', async () => {
const session = kernelSessionRepository.ensureSession({
scopeType: 'pull_request',
scopeKey: 'acme/repo#7',
metadata: { owner: 'acme', repo: 'repo', prNumber: 7 },
runId: 'run-7',
});
const skillRegistry = new KernelTaskRegistry<DummyState>();
const subagentRegistry = new KernelAgentRegistry<DummyState>();
skillRegistry.register({
kind: 'skill',
name: 'step_one',
description: 'Initial skill for runner test',
execute: async () => ({
state: { counter: 1 },
enqueue: [{ kind: 'subagent', name: 'step_two' }],
}),
});
subagentRegistry.register({
kind: 'subagent',
name: 'step_two',
source: 'built-in',
whenToUse: 'Increment the test counter',
description: 'Test subagent used by runner tests',
execute: async (_task, context) => ({
state: { counter: context.state.counter + 1 },
}),
});
const runner = new AgentKernelRunner(skillRegistry, new KernelAgentInvoker(subagentRegistry), {
plan: () => [],
});
const checkpoint = await runner.run({
sessionId: session.id,
runId: 'run-7',
initialState: { counter: 0 },
initialTasks: [{ kind: 'skill', name: 'step_one' }],
});
const events = kernelSessionRepository.listEvents(session.id);
expect(checkpoint.state.counter).toBe(2);
expect(checkpoint.pendingTasks).toHaveLength(0);
expect(checkpoint.stopReason).toBe('completed');
expect(events.map((event) => event.eventType).sort()).toEqual([
'task_completed',
'task_completed',
'task_started',
'task_started',
]);
});
test('continueExisting ignores persisted stop reason and resumes planned work', async () => {
const session = kernelSessionRepository.ensureSession({
scopeType: 'pull_request',
scopeKey: 'acme/repo#8',
metadata: { owner: 'acme', repo: 'repo', prNumber: 8 },
runId: 'run-8',
});
kernelSessionRepository.saveCheckpoint(session.id, {
state: { counter: 1 },
pendingTasks: [],
stopReason: 'awaiting_human_feedback',
});
const skillRegistry = new KernelTaskRegistry<DummyState>();
const subagentRegistry = new KernelAgentRegistry<DummyState>();
skillRegistry.register({
kind: 'skill',
name: 'resume_step',
description: 'Resume skill for runner test',
execute: async (_task, context) => ({
state: { counter: context.state.counter + 1 },
}),
});
const runner = new AgentKernelRunner(skillRegistry, new KernelAgentInvoker(subagentRegistry), {
plan: (context) =>
context.state.counter < 2 ? [{ kind: 'skill', name: 'resume_step' }] : [],
});
const checkpoint = await runner.run({
sessionId: session.id,
runId: 'run-8',
initialState: { counter: 0 },
initialTasks: [],
continueExisting: true,
});
expect(checkpoint.state.counter).toBe(2);
expect(checkpoint.stopReason).toBe('completed');
});
});

View File

@@ -0,0 +1,81 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test';
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import path from 'node:path';
import { closeDatabase, initDatabase } from '../../db/database';
import { getKernelAgentContext } from '../agents/kernel-agent-context';
import { KernelAgentInvoker } from '../agents/kernel-agent-invoker';
import { KernelAgentRegistry } from '../agents/kernel-agent-registry';
import { kernelSessionRepository } from '../session/session-repository';
interface DummyState {
value: number;
}
describe('KernelAgentInvoker', () => {
let tempDir: string;
let savedDbPath: string | undefined;
beforeEach(async () => {
tempDir = await mkdtemp(path.join(tmpdir(), 'kernel-agent-invoker-db-'));
savedDbPath = process.env.DATABASE_PATH;
process.env.DATABASE_PATH = path.join(tempDir, 'assistant.db');
initDatabase();
});
afterEach(async () => {
closeDatabase();
if (savedDbPath === undefined) {
Reflect.deleteProperty(process.env, 'DATABASE_PATH');
} else {
process.env.DATABASE_PATH = savedDbPath;
}
await rm(tempDir, { recursive: true, force: true });
});
test('invokes subagent with isolated agent context and structured result', async () => {
const session = kernelSessionRepository.ensureSession({
scopeType: 'pull_request',
scopeKey: 'acme/repo#88',
metadata: { owner: 'acme', repo: 'repo', prNumber: 88 },
runId: 'run-88',
});
const registry = new KernelAgentRegistry<DummyState>();
registry.register({
kind: 'subagent',
name: 'test:subagent',
source: 'built-in',
description: 'Test subagent',
whenToUse: 'Used by invoker test',
tags: ['test'],
execute: async (_task, context) => {
const agentContext = getKernelAgentContext();
expect(agentContext?.agentType).toBe('subagent');
expect(agentContext?.subagentName).toBe('test:subagent');
expect(context.delegation.parentSessionId).toBe(session.id);
return {
state: { value: context.state.value + 1 },
summary: 'subagent completed',
artifacts: { nextValue: context.state.value + 1 },
};
},
});
const invoker = new KernelAgentInvoker(registry);
const output = await invoker.invoke(
{ kind: 'subagent', name: 'test:subagent', input: { focus: 'test' } },
{
session,
runId: 'run-88',
state: { value: 1 },
}
);
expect(output.result?.state).toEqual({ value: 2 });
expect(output.invocation.status).toBe('completed');
expect(output.invocation.result?.summary).toBe('subagent completed');
expect(output.invocation.result?.artifacts).toEqual({ nextValue: 2 });
});
});

View File

@@ -0,0 +1,101 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test';
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import path from 'node:path';
import { closeDatabase, initDatabase } from '../../db/database';
import { kernelSessionRepository } from '../session/session-repository';
describe('KernelSessionRepository', () => {
let tempDir: string;
let savedDbPath: string | undefined;
beforeEach(async () => {
tempDir = await mkdtemp(path.join(tmpdir(), 'kernel-session-db-'));
savedDbPath = process.env.DATABASE_PATH;
process.env.DATABASE_PATH = path.join(tempDir, 'assistant.db');
initDatabase();
});
afterEach(async () => {
closeDatabase();
if (savedDbPath === undefined) {
Reflect.deleteProperty(process.env, 'DATABASE_PATH');
} else {
process.env.DATABASE_PATH = savedDbPath;
}
await rm(tempDir, { recursive: true, force: true });
});
test('ensureSession reuses the same scope key and updates metadata', () => {
const first = kernelSessionRepository.ensureSession({
scopeType: 'pull_request',
scopeKey: 'acme/repo#42',
metadata: { owner: 'acme', repo: 'repo', prNumber: 42 },
runId: 'run-1',
});
const second = kernelSessionRepository.ensureSession({
scopeType: 'pull_request',
scopeKey: 'acme/repo#42',
metadata: { owner: 'acme', repo: 'repo', prNumber: 42, updated: true },
runId: 'run-2',
});
expect(second.id).toBe(first.id);
expect(second.lastRunId).toBe('run-2');
expect(second.metadata).toEqual({ owner: 'acme', repo: 'repo', prNumber: 42, updated: true });
});
test('appendEvent and saveCheckpoint persist session runtime state', () => {
const session = kernelSessionRepository.ensureSession({
scopeType: 'pull_request',
scopeKey: 'acme/repo#99',
metadata: { owner: 'acme', repo: 'repo', prNumber: 99 },
runId: 'run-99',
});
kernelSessionRepository.appendEvent(session.id, 'review_enqueued', { runId: 'run-99' });
kernelSessionRepository.appendEvent(session.id, 'task_started', { name: 'prepare_workspace' });
kernelSessionRepository.saveCheckpoint(session.id, {
state: { prepared: true, findings: 3 },
pendingTasks: [{ kind: 'skill', name: 'publish_review' }],
stopReason: 'waiting',
});
const events = kernelSessionRepository.listEvents(session.id);
const checkpoint = kernelSessionRepository.loadCheckpoint<{
prepared: boolean;
findings: number;
}>(session.id);
expect(events).toHaveLength(2);
expect(events.map((event) => event.eventType).sort()).toEqual([
'review_enqueued',
'task_started',
]);
expect(checkpoint).not.toBeNull();
expect(checkpoint?.state).toEqual({ prepared: true, findings: 3 });
expect(checkpoint?.pendingTasks).toEqual([{ kind: 'skill', name: 'publish_review' }]);
expect(checkpoint?.stopReason).toBe('waiting');
});
test('can query sessions by scope key and list sessions', () => {
const first = kernelSessionRepository.ensureSession({
scopeType: 'pull_request',
scopeKey: 'acme/repo#1',
metadata: { owner: 'acme', repo: 'repo', prNumber: 1 },
runId: 'run-1',
});
const second = kernelSessionRepository.ensureSession({
scopeType: 'pull_request',
scopeKey: 'acme/repo#2',
metadata: { owner: 'acme', repo: 'repo', prNumber: 2 },
runId: 'run-2',
});
expect(kernelSessionRepository.getSessionByScopeKey('acme/repo#1')?.id).toBe(first.id);
expect(kernelSessionRepository.listSessions(10).map((session) => session.id)).toEqual(
expect.arrayContaining([first.id, second.id])
);
});
});

View File

@@ -0,0 +1,68 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test';
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import path from 'node:path';
import { closeDatabase, initDatabase } from '../../db/database';
import { kernelSessionRepository } from '../session/session-repository';
describe('KernelSessionRepository subagent invocations', () => {
let tempDir: string;
let savedDbPath: string | undefined;
beforeEach(async () => {
tempDir = await mkdtemp(path.join(tmpdir(), 'kernel-subagent-db-'));
savedDbPath = process.env.DATABASE_PATH;
process.env.DATABASE_PATH = path.join(tempDir, 'assistant.db');
initDatabase();
});
afterEach(async () => {
closeDatabase();
if (savedDbPath === undefined) {
Reflect.deleteProperty(process.env, 'DATABASE_PATH');
} else {
process.env.DATABASE_PATH = savedDbPath;
}
await rm(tempDir, { recursive: true, force: true });
});
test('persists and lists subagent invocations', () => {
const session = kernelSessionRepository.ensureSession({
scopeType: 'pull_request',
scopeKey: 'acme/repo#101',
metadata: { owner: 'acme', repo: 'repo', prNumber: 101 },
runId: 'run-101',
});
const invocation = kernelSessionRepository.createSubagentInvocation({
parentSessionId: session.id,
parentRunId: 'run-101',
parentTaskName: 'custom:security-audit',
subagentName: 'custom:security-audit',
agentId: 'agent-123',
packet: {
goal: 'Review security issues',
parentTaskName: 'custom:security-audit',
input: { domain: 'security' },
parentSessionId: session.id,
parentRunId: 'run-101',
contextSummary: 'summary',
},
});
kernelSessionRepository.completeSubagentInvocation(invocation.id, 'completed', {
agentId: 'agent-123',
agentType: 'custom:security-audit',
summary: 'security review done',
totalDurationMs: 10,
totalToolUseCount: 0,
totalTokens: 0,
artifacts: { findings: 2 },
});
const invocations = kernelSessionRepository.listSubagentInvocations(session.id);
expect(invocations).toHaveLength(1);
expect(invocations[0]?.subagentName).toBe('custom:security-audit');
expect(invocations[0]?.result?.summary).toBe('security review done');
});
});

View File

@@ -0,0 +1,15 @@
import { AsyncLocalStorage } from 'node:async_hooks';
import type { KernelSubagentContextRecord } from '../types';
const kernelAgentContextStorage = new AsyncLocalStorage<KernelSubagentContextRecord>();
export function getKernelAgentContext(): KernelSubagentContextRecord | undefined {
return kernelAgentContextStorage.getStore();
}
export function runWithKernelAgentContext<T>(
context: KernelSubagentContextRecord,
fn: () => Promise<T>
): Promise<T> {
return kernelAgentContextStorage.run(context, fn);
}

View File

@@ -0,0 +1,140 @@
import { randomUUID } from 'node:crypto';
import type { KernelHookRegistry } from '../hooks/kernel-hook-registry';
import { runKernelHooks } from '../hooks/kernel-hook-runner';
import { kernelSessionRepository } from '../session/session-repository';
import type {
KernelAgentExecutionContext,
KernelDelegationPacket,
KernelExecutionContext,
KernelHandlerResult,
KernelSubagentDefinition,
KernelTask,
} from '../types';
import { runWithKernelAgentContext } from './kernel-agent-context';
import { KernelAgentRegistry } from './kernel-agent-registry';
import { finalizeKernelSubagentResult } from './kernel-subagent-result';
export interface KernelSubagentInvocationOutput<TState> {
result?: KernelHandlerResult<TState>;
invocation: ReturnType<typeof kernelSessionRepository.listSubagentInvocations>[number];
}
export class KernelAgentInvoker<TState> {
constructor(
private readonly registry: KernelAgentRegistry<TState>,
private readonly hookRegistry?: KernelHookRegistry
) {}
get(name: string): KernelSubagentDefinition<TState> | undefined {
return this.registry.get(name);
}
getAll(): KernelSubagentDefinition<TState>[] {
return this.registry.getAll();
}
filterByTag(tag: string): KernelSubagentDefinition<TState>[] {
return this.registry.filterByTag(tag);
}
async invoke(
task: KernelTask,
context: KernelExecutionContext<TState>
): Promise<KernelSubagentInvocationOutput<TState>> {
const agent = this.registry.get(task.name);
if (!agent) {
throw new Error(`Kernel subagent definition not found: ${task.name}`);
}
const agentId = randomUUID();
const delegation: KernelDelegationPacket = {
goal: agent.whenToUse,
parentTaskName: task.name,
input: task.input ?? {},
parentSessionId: context.session.id,
parentRunId: context.runId,
contextSummary:
typeof (context.state as { compressedContext?: { summary?: string } }).compressedContext
?.summary === 'string'
? (context.state as { compressedContext?: { summary?: string } }).compressedContext
?.summary
: undefined,
};
const invocation = kernelSessionRepository.createSubagentInvocation({
parentSessionId: context.session.id,
parentRunId: context.runId,
parentTaskName: task.name,
subagentName: agent.name,
agentId,
packet: delegation,
});
const agentContext: KernelAgentExecutionContext<TState> = {
...context,
agent,
delegation,
};
if (this.hookRegistry) {
const hookResult = await runKernelHooks({
registry: this.hookRegistry,
input: {
event: 'SubagentStart',
sessionId: context.session.id,
runId: context.runId,
subagentName: agent.name,
agentId,
packet: delegation,
},
});
if (hookResult.blockingReason) {
throw new Error(hookResult.blockingReason);
}
}
const startTime = Date.now();
try {
const result = await runWithKernelAgentContext(
{
agentId,
parentSessionId: context.session.id,
agentType: 'subagent',
subagentName: agent.name,
source: agent.source,
invocationKind: 'spawn',
},
() => agent.execute(task, agentContext)
);
const finalized = finalizeKernelSubagentResult({
agentId,
agentType: agent.name,
startTime,
result,
});
return {
result,
invocation: kernelSessionRepository.completeSubagentInvocation(
invocation.id,
'completed',
finalized
),
};
} catch (error) {
const finalized = finalizeKernelSubagentResult({
agentId,
agentType: agent.name,
startTime,
result: {
summary: error instanceof Error ? error.message : String(error),
artifacts: { error: error instanceof Error ? error.message : String(error) },
},
});
kernelSessionRepository.completeSubagentInvocation(invocation.id, 'failed', finalized);
throw error;
}
}
}

View File

@@ -0,0 +1,21 @@
import type { KernelSubagentDefinition } from '../types';
export class KernelAgentRegistry<TState> {
private readonly agents = new Map<string, KernelSubagentDefinition<TState>>();
register(agent: KernelSubagentDefinition<TState>): void {
this.agents.set(agent.name, agent);
}
get(agentType: string): KernelSubagentDefinition<TState> | undefined {
return this.agents.get(agentType);
}
getAll(): KernelSubagentDefinition<TState>[] {
return [...this.agents.values()];
}
filterByTag(tag: string): KernelSubagentDefinition<TState>[] {
return this.getAll().filter((agent) => agent.tags?.includes(tag));
}
}

View File

@@ -0,0 +1,21 @@
import type { KernelHandlerResult, KernelSubagentInvocationResult } from '../types';
export function finalizeKernelSubagentResult<TState>(params: {
agentId: string;
agentType: string;
startTime: number;
result?: KernelHandlerResult<TState>;
}): KernelSubagentInvocationResult {
const { agentId, agentType, startTime, result } = params;
const totalDurationMs = Date.now() - startTime;
return {
agentId,
agentType,
summary: result?.summary ?? `${agentType} completed`,
totalDurationMs,
totalToolUseCount: 0,
totalTokens: 0,
artifacts: result?.artifacts,
};
}

View File

@@ -0,0 +1,219 @@
import { describe, expect, test } from 'bun:test';
import { KernelHookRegistry } from '../kernel-hook-registry';
import { runKernelHooks } from '../kernel-hook-runner';
import type { KernelHookDefinition, KernelHookInput } from '../kernel-hook-types';
const baseContext = {
workspacePath: '/tmp/workspace',
mirrorPath: '/tmp/mirror',
runId: 'run-1',
};
function makeRegistry(hooks: KernelHookDefinition[]): KernelHookRegistry {
const registry = new KernelHookRegistry();
for (const hook of hooks) {
registry.register(hook);
}
return registry;
}
function makeHook(
name: string,
event: KernelHookInput['event'],
execute: KernelHookDefinition['execute']
): KernelHookDefinition {
return {
name,
event,
description: `Test hook ${name}`,
execute,
};
}
describe('runKernelHooks', () => {
test.each([
[
'SessionStart',
{
event: 'SessionStart',
sessionId: 'session-1',
runId: 'run-1',
scopeKey: 'repo#1',
},
],
[
'SubagentStart',
{
event: 'SubagentStart',
sessionId: 'session-1',
runId: 'run-1',
subagentName: 'test:subagent',
agentId: 'agent-1',
packet: {
input: { focus: 'test' },
goal: 'test goal',
parentTaskName: 'test:task',
parentSessionId: 'session-1',
parentRunId: 'run-1',
},
},
],
[
'PermissionRequest',
{
event: 'PermissionRequest',
toolName: 'write_file',
toolCallId: 'call-1',
input: { value: 'raw' },
context: baseContext,
suggestedBehavior: 'ask',
reason: 'needs approval',
},
],
[
'PreToolUse',
{
event: 'PreToolUse',
toolName: 'write_file',
toolCallId: 'call-1',
input: { value: 'raw' },
context: baseContext,
},
],
[
'PostToolUse',
{
event: 'PostToolUse',
toolName: 'write_file',
toolCallId: 'call-1',
input: { value: 'raw' },
output: { ok: true },
context: baseContext,
},
],
[
'PostToolUseFailure',
{
event: 'PostToolUseFailure',
toolName: 'write_file',
toolCallId: 'call-1',
input: { value: 'raw' },
error: 'boom',
context: baseContext,
},
],
] as const)('dispatches %s to matching hooks', async (_label, input) => {
const executed: string[] = [];
const registry = makeRegistry([
makeHook('first', input.event, async () => {
executed.push('first');
return { additionalContext: 'ctx-1' };
}),
]);
const result = await runKernelHooks({ registry, input });
expect(executed).toEqual(['first']);
expect(result.results).toHaveLength(1);
expect(result.additionalContexts).toEqual(['ctx-1']);
});
test('aggregates additionalContext values and lets later updatedInput override earlier values', async () => {
const registry = makeRegistry([
makeHook('first', 'PreToolUse', async () => ({
additionalContext: 'ctx-1',
updatedInput: { value: 'first' },
})),
makeHook('second', 'PreToolUse', async () => ({
additionalContext: 'ctx-2',
updatedInput: { value: 'second' },
})),
]);
const result = await runKernelHooks({
registry,
input: {
event: 'PreToolUse',
toolName: 'write_file',
toolCallId: 'call-1',
input: { value: 'raw' },
context: baseContext,
},
});
expect(result.additionalContexts).toEqual(['ctx-1', 'ctx-2']);
expect(result.updatedInput).toEqual({ value: 'second' });
expect(result.results).toHaveLength(2);
});
test('propagates blockingReason when a hook returns decision block', async () => {
const registry = makeRegistry([
makeHook('before', 'PermissionRequest', async () => ({
additionalContext: 'ctx-before',
updatedInput: { value: 'before' },
})),
makeHook('blocker', 'PermissionRequest', async () => ({
decision: 'block',
reason: 'blocked by policy',
additionalContext: 'ctx-blocker',
updatedInput: { value: 'blocked' },
})),
makeHook('after', 'PermissionRequest', async () => ({
additionalContext: 'ctx-after',
updatedInput: { value: 'after' },
})),
]);
const result = await runKernelHooks({
registry,
input: {
event: 'PermissionRequest',
toolName: 'write_file',
toolCallId: 'call-1',
input: { value: 'raw' },
context: baseContext,
suggestedBehavior: 'ask',
reason: 'needs approval',
},
});
expect(result.additionalContexts).toEqual(['ctx-before', 'ctx-blocker']);
expect(result.updatedInput).toEqual({ value: 'blocked' });
expect(result.blockingReason).toBe('blocked by policy');
expect(result.results).toHaveLength(2);
});
test('preserves approve decisions for PermissionRequest without introducing a blocking reason', async () => {
const registry = makeRegistry([
makeHook('approver', 'PermissionRequest', async () => ({
decision: 'approve',
reason: 'approved by reviewer',
additionalContext: 'ctx-approve',
updatedInput: { value: 'approved' },
})),
]);
const result = await runKernelHooks({
registry,
input: {
event: 'PermissionRequest',
toolName: 'write_file',
toolCallId: 'call-1',
input: { value: 'raw' },
context: baseContext,
suggestedBehavior: 'ask',
reason: 'needs approval',
},
});
expect(result.additionalContexts).toEqual(['ctx-approve']);
expect(result.updatedInput).toEqual({ value: 'approved' });
expect(result.blockingReason).toBeUndefined();
expect(result.results).toEqual([
expect.objectContaining({
decision: 'approve',
reason: 'approved by reviewer',
}),
]);
});
});

View File

@@ -0,0 +1,19 @@
import type { KernelHookDefinition, KernelHookEventName } from './kernel-hook-types';
export class KernelHookRegistry {
private readonly hooks = new Map<KernelHookEventName, KernelHookDefinition[]>();
register(hook: KernelHookDefinition): void {
const existing = this.hooks.get(hook.event) ?? [];
existing.push(hook);
this.hooks.set(hook.event, existing);
}
get(event: KernelHookEventName): KernelHookDefinition[] {
return this.hooks.get(event) ?? [];
}
getAll(): KernelHookDefinition[] {
return [...this.hooks.values()].flat();
}
}

View File

@@ -0,0 +1,47 @@
import { logger } from '../../utils/logger';
import { KernelHookRegistry } from './kernel-hook-registry';
import type { KernelHookInput, KernelLifecycleResult } from './kernel-hook-types';
export async function runKernelHooks(params: {
registry: KernelHookRegistry;
input: KernelHookInput;
}): Promise<KernelLifecycleResult> {
const hooks = params.registry.get(params.input.event);
const results = [] as KernelLifecycleResult['results'];
const additionalContexts: string[] = [];
let updatedInput: Record<string, unknown> | undefined;
let blockingReason: string | undefined;
for (const hook of hooks) {
try {
const result = await hook.execute(params.input);
if (!result) {
continue;
}
results.push(result);
if (result.additionalContext) {
additionalContexts.push(result.additionalContext);
}
if (result.updatedInput) {
updatedInput = result.updatedInput;
}
if (result.continue === false || result.decision === 'block') {
blockingReason = result.reason ?? `Execution blocked by hook ${hook.name}`;
break;
}
} catch (error) {
logger.error('Kernel hook 执行失败', {
hookName: hook.name,
event: params.input.event,
error: error instanceof Error ? error.message : String(error),
});
}
}
return {
results,
additionalContexts,
updatedInput,
blockingReason,
};
}

View File

@@ -0,0 +1,99 @@
import type { ToolExecutionContext } from '../../review/tools/types';
import type { KernelDelegationPacket, KernelSubagentInvocationResult } from '../types';
export type KernelHookEventName =
| 'SessionStart'
| 'SubagentStart'
| 'PermissionRequest'
| 'PreToolUse'
| 'PostToolUse'
| 'PostToolUseFailure';
export interface SessionStartHookInput {
event: 'SessionStart';
sessionId: string;
runId: string;
scopeKey: string;
}
export interface SubagentStartHookInput {
event: 'SubagentStart';
sessionId: string;
runId: string;
subagentName: string;
agentId: string;
packet: KernelDelegationPacket;
}
export interface PreToolUseHookInput {
event: 'PreToolUse';
toolName: string;
toolCallId: string;
input: Record<string, unknown>;
context: ToolExecutionContext;
}
export interface PermissionRequestHookInput {
event: 'PermissionRequest';
toolName: string;
toolCallId: string;
input: Record<string, unknown>;
context: ToolExecutionContext;
suggestedBehavior: 'ask' | 'deny';
reason: string;
}
export interface PostToolUseHookInput {
event: 'PostToolUse';
toolName: string;
toolCallId: string;
input: Record<string, unknown>;
output: unknown;
context: ToolExecutionContext;
}
export interface PostToolUseFailureHookInput {
event: 'PostToolUseFailure';
toolName: string;
toolCallId: string;
input: Record<string, unknown>;
error: string;
context: ToolExecutionContext;
}
export type KernelHookInput =
| SessionStartHookInput
| SubagentStartHookInput
| PermissionRequestHookInput
| PreToolUseHookInput
| PostToolUseHookInput
| PostToolUseFailureHookInput;
export interface KernelHookResult {
continue?: boolean;
additionalContext?: string;
updatedInput?: Record<string, unknown>;
decision?: 'approve' | 'block';
reason?: string;
metadata?: Record<string, unknown>;
}
export interface KernelHookDefinition {
name: string;
event: KernelHookEventName;
description: string;
execute(input: KernelHookInput): Promise<KernelHookResult | undefined>;
}
export interface KernelLifecycleResult {
results: KernelHookResult[];
additionalContexts: string[];
updatedInput?: Record<string, unknown>;
blockingReason?: string;
}
export interface KernelSubagentCompletionEnvelope {
invocationId: string;
subagentName: string;
result: KernelSubagentInvocationResult;
}

View File

@@ -0,0 +1,17 @@
import type { KernelTaskHandler } from '../types';
export class KernelTaskRegistry<TState> {
private readonly handlers = new Map<string, KernelTaskHandler<TState>>();
register(handler: KernelTaskHandler<TState>): void {
this.handlers.set(handler.name, handler);
}
get(name: string): KernelTaskHandler<TState> | undefined {
return this.handlers.get(name);
}
getAll(): KernelTaskHandler<TState>[] {
return [...this.handlers.values()];
}
}

View File

@@ -0,0 +1,138 @@
import { KernelAgentInvoker } from '../agents/kernel-agent-invoker';
import { KernelTaskRegistry } from '../registry/kernel-task-registry';
import { kernelSessionRepository } from '../session/session-repository';
import type {
KernelCheckpoint,
KernelExecutionContext,
KernelTask,
KernelTurnPlanner,
} from '../types';
export class AgentKernelRunner<TState> {
constructor(
private readonly skillRegistry: KernelTaskRegistry<TState>,
private readonly subagentInvoker: KernelAgentInvoker<TState>,
private readonly planner: KernelTurnPlanner<TState>
) {}
async run(params: {
sessionId: string;
runId: string;
initialState: TState;
initialTasks: KernelTask[];
continueExisting?: boolean;
}): Promise<KernelCheckpoint<TState>> {
const session = kernelSessionRepository.getSessionById(params.sessionId);
if (!session) {
throw new Error(`Kernel session not found: ${params.sessionId}`);
}
const persisted = kernelSessionRepository.loadCheckpoint<TState>(params.sessionId);
let state = persisted?.state ?? params.initialState;
const pendingTasks = [...(persisted?.pendingTasks ?? params.initialTasks)];
let stopReason: string | undefined;
while (!stopReason) {
if (pendingTasks.length === 0) {
const plannedTasks = this.planner.plan({
session,
runId: params.runId,
state,
pendingTasks: [...pendingTasks],
});
if (plannedTasks.length === 0) {
stopReason = 'completed';
break;
}
pendingTasks.push(...plannedTasks);
}
const task = pendingTasks.shift() as KernelTask;
if (task.kind === 'subagent' && !this.subagentInvoker.get(task.name)) {
throw new Error(`Kernel subagent handler not found: ${task.name}`);
}
if (task.kind === 'skill' && !this.skillRegistry.get(task.name)) {
throw new Error(`Kernel skill handler not found: ${task.name}`);
}
kernelSessionRepository.appendEvent(params.sessionId, 'task_started', {
kind: task.kind,
name: task.name,
input: task.input ?? {},
runId: params.runId,
});
const context: KernelExecutionContext<TState> = {
session,
runId: params.runId,
state,
};
let result;
let invocation;
try {
if (task.kind === 'skill') {
result = await this.skillRegistry.get(task.name)?.execute(task, context);
} else {
const invocationOutput = await this.subagentInvoker.invoke(task, context);
result = invocationOutput.result;
invocation = invocationOutput.invocation;
}
} catch (error) {
kernelSessionRepository.appendEvent(params.sessionId, 'task_failed', {
kind: task.kind,
name: task.name,
runId: params.runId,
invocationId: invocation?.id,
agentId: invocation?.agentId,
error: error instanceof Error ? error.message : String(error),
});
kernelSessionRepository.saveCheckpoint(params.sessionId, {
state,
pendingTasks: [task, ...pendingTasks],
stopReason: 'failed',
});
throw error;
}
if (result?.state !== undefined) {
state = result.state;
}
if (result?.prepend?.length) {
pendingTasks.unshift(...result.prepend);
}
if (result?.enqueue?.length) {
pendingTasks.push(...result.enqueue);
}
if (result?.stopReason) {
stopReason = result.stopReason;
}
kernelSessionRepository.appendEvent(params.sessionId, 'task_completed', {
kind: task.kind,
name: task.name,
runId: params.runId,
invocationId: invocation?.id,
agentId: invocation?.agentId,
summary: invocation?.result?.summary ?? result?.summary,
artifacts: invocation?.result?.artifacts ?? result?.artifacts,
stopReason: result?.stopReason,
});
kernelSessionRepository.saveCheckpoint(params.sessionId, {
state,
pendingTasks,
stopReason,
});
}
const checkpoint = {
state,
pendingTasks,
stopReason: stopReason ?? 'completed',
};
kernelSessionRepository.saveCheckpoint(params.sessionId, checkpoint);
return checkpoint;
}
}

View File

@@ -0,0 +1,335 @@
import { randomUUID } from 'node:crypto';
import { getDatabase } from '../../db/database';
import type {
KernelCheckpoint,
KernelDelegationPacket,
KernelSessionEventRecord,
KernelSessionRecord,
KernelSubagentInvocationRecord,
KernelSubagentInvocationResult,
} from '../types';
interface SessionRow {
id: string;
scope_type: 'pull_request' | 'commit';
scope_key: string;
metadata_json: string;
created_at: string;
updated_at: string;
last_run_id?: string;
}
interface EventRow {
id: string;
session_id: string;
event_type: string;
payload_json: string;
created_at: string;
}
interface CheckpointRow {
session_id: string;
state_json: string;
pending_tasks_json: string;
stop_reason?: string;
updated_at: string;
state_version: number;
}
interface SubagentInvocationRow {
id: string;
parent_session_id: string;
parent_run_id: string;
parent_task_name: string;
subagent_name: string;
agent_id: string;
status: 'running' | 'completed' | 'failed';
input_json: string;
result_json?: string;
started_at: string;
finished_at?: string;
}
function toSessionRecord(row: SessionRow): KernelSessionRecord {
return {
id: row.id,
scopeType: row.scope_type,
scopeKey: row.scope_key,
metadata: JSON.parse(row.metadata_json) as Record<string, unknown>,
createdAt: row.created_at,
updatedAt: row.updated_at,
lastRunId: row.last_run_id,
};
}
export class KernelSessionRepository {
ensureSession(input: {
scopeType: 'pull_request' | 'commit';
scopeKey: string;
metadata: Record<string, unknown>;
runId?: string;
}): KernelSessionRecord {
const db = getDatabase();
const existing = db
.query(
`SELECT id, scope_type, scope_key, metadata_json, created_at, updated_at, last_run_id
FROM agent_kernel_sessions
WHERE scope_key = ?`
)
.get(input.scopeKey) as SessionRow | null;
if (existing) {
db.query(
`UPDATE agent_kernel_sessions
SET metadata_json = ?, updated_at = datetime('now'), last_run_id = ?
WHERE id = ?`
).run(
JSON.stringify(input.metadata),
input.runId ?? existing.last_run_id ?? null,
existing.id
);
return this.getSessionById(existing.id) as KernelSessionRecord;
}
const id = randomUUID();
db.query(
`INSERT INTO agent_kernel_sessions (
id, scope_type, scope_key, metadata_json, last_run_id
) VALUES (?, ?, ?, ?, ?)`
).run(id, input.scopeType, input.scopeKey, JSON.stringify(input.metadata), input.runId ?? null);
return this.getSessionById(id) as KernelSessionRecord;
}
getSessionById(sessionId: string): KernelSessionRecord | null {
const db = getDatabase();
const row = db
.query(
`SELECT id, scope_type, scope_key, metadata_json, created_at, updated_at, last_run_id
FROM agent_kernel_sessions
WHERE id = ?`
)
.get(sessionId) as SessionRow | null;
return row ? toSessionRecord(row) : null;
}
getSessionByScopeKey(scopeKey: string): KernelSessionRecord | null {
const db = getDatabase();
const row = db
.query(
`SELECT id, scope_type, scope_key, metadata_json, created_at, updated_at, last_run_id
FROM agent_kernel_sessions
WHERE scope_key = ?`
)
.get(scopeKey) as SessionRow | null;
return row ? toSessionRecord(row) : null;
}
listSessions(limit = 50): KernelSessionRecord[] {
const db = getDatabase();
const rows = db
.query(
`SELECT id, scope_type, scope_key, metadata_json, created_at, updated_at, last_run_id
FROM agent_kernel_sessions
ORDER BY updated_at DESC, created_at DESC
LIMIT ?`
)
.all(limit) as SessionRow[];
return rows.map(toSessionRecord);
}
appendEvent(
sessionId: string,
eventType: string,
payload: Record<string, unknown>
): KernelSessionEventRecord {
const db = getDatabase();
const id = randomUUID();
db.query(
`INSERT INTO agent_kernel_session_events (id, session_id, event_type, payload_json)
VALUES (?, ?, ?, ?)`
).run(id, sessionId, eventType, JSON.stringify(payload));
const row = db
.query(
`SELECT id, session_id, event_type, payload_json, created_at
FROM agent_kernel_session_events
WHERE id = ?`
)
.get(id) as EventRow;
return {
id: row.id,
sessionId: row.session_id,
eventType: row.event_type,
payload: JSON.parse(row.payload_json) as Record<string, unknown>,
createdAt: row.created_at,
};
}
listEvents(sessionId: string): KernelSessionEventRecord[] {
const db = getDatabase();
const rows = db
.query(
`SELECT id, session_id, event_type, payload_json, created_at
FROM agent_kernel_session_events
WHERE session_id = ?
ORDER BY created_at ASC, id ASC`
)
.all(sessionId) as EventRow[];
return rows.map((row) => ({
id: row.id,
sessionId: row.session_id,
eventType: row.event_type,
payload: JSON.parse(row.payload_json) as Record<string, unknown>,
createdAt: row.created_at,
}));
}
saveCheckpoint<TState>(
sessionId: string,
checkpoint: KernelCheckpoint<TState>,
stateVersion = 1
): void {
const db = getDatabase();
db.query(
`INSERT INTO agent_kernel_session_checkpoints (
session_id, state_json, pending_tasks_json, stop_reason, state_version, updated_at
) VALUES (?, ?, ?, ?, ?, datetime('now'))
ON CONFLICT(session_id) DO UPDATE SET
state_json = excluded.state_json,
pending_tasks_json = excluded.pending_tasks_json,
stop_reason = excluded.stop_reason,
state_version = excluded.state_version,
updated_at = datetime('now')`
).run(
sessionId,
JSON.stringify(checkpoint.state),
JSON.stringify(checkpoint.pendingTasks),
checkpoint.stopReason ?? null,
stateVersion
);
}
loadCheckpoint<TState>(sessionId: string): KernelCheckpoint<TState> | null {
const db = getDatabase();
const row = db
.query(
`SELECT session_id, state_json, pending_tasks_json, stop_reason, updated_at, state_version
FROM agent_kernel_session_checkpoints
WHERE session_id = ?`
)
.get(sessionId) as CheckpointRow | null;
if (!row) {
return null;
}
return {
state: JSON.parse(row.state_json) as TState,
pendingTasks: JSON.parse(row.pending_tasks_json) as KernelCheckpoint<TState>['pendingTasks'],
stopReason: row.stop_reason,
};
}
deleteCheckpoint(sessionId: string): void {
const db = getDatabase();
db.query('DELETE FROM agent_kernel_session_checkpoints WHERE session_id = ?').run(sessionId);
}
createSubagentInvocation(input: {
parentSessionId: string;
parentRunId: string;
parentTaskName: string;
subagentName: string;
agentId: string;
packet: KernelDelegationPacket;
}): KernelSubagentInvocationRecord {
const db = getDatabase();
const id = randomUUID();
db.query(
`INSERT INTO agent_kernel_subagent_invocations (
id, parent_session_id, parent_run_id, parent_task_name, subagent_name, agent_id, status, input_json
) VALUES (?, ?, ?, ?, ?, ?, 'running', ?)`
).run(
id,
input.parentSessionId,
input.parentRunId,
input.parentTaskName,
input.subagentName,
input.agentId,
JSON.stringify(input.packet)
);
return this.getSubagentInvocationById(id) as KernelSubagentInvocationRecord;
}
completeSubagentInvocation(
invocationId: string,
status: 'completed' | 'failed',
result: KernelSubagentInvocationResult
): KernelSubagentInvocationRecord {
const db = getDatabase();
db.query(
`UPDATE agent_kernel_subagent_invocations
SET status = ?, result_json = ?, finished_at = datetime('now')
WHERE id = ?`
).run(status, JSON.stringify(result), invocationId);
return this.getSubagentInvocationById(invocationId) as KernelSubagentInvocationRecord;
}
listSubagentInvocations(parentSessionId: string): KernelSubagentInvocationRecord[] {
const db = getDatabase();
const rows = db
.query(
`SELECT id, parent_session_id, parent_run_id, parent_task_name, subagent_name, agent_id,
status, input_json, result_json, started_at, finished_at
FROM agent_kernel_subagent_invocations
WHERE parent_session_id = ?
ORDER BY started_at ASC, id ASC`
)
.all(parentSessionId) as SubagentInvocationRow[];
return rows.map((row) => this.toSubagentInvocationRecord(row));
}
private getSubagentInvocationById(invocationId: string): KernelSubagentInvocationRecord | null {
const db = getDatabase();
const row = db
.query(
`SELECT id, parent_session_id, parent_run_id, parent_task_name, subagent_name, agent_id,
status, input_json, result_json, started_at, finished_at
FROM agent_kernel_subagent_invocations
WHERE id = ?`
)
.get(invocationId) as SubagentInvocationRow | null;
return row ? this.toSubagentInvocationRecord(row) : null;
}
private toSubagentInvocationRecord(row: SubagentInvocationRow): KernelSubagentInvocationRecord {
return {
id: row.id,
parentSessionId: row.parent_session_id,
parentRunId: row.parent_run_id,
parentTaskName: row.parent_task_name,
subagentName: row.subagent_name,
agentId: row.agent_id,
status: row.status,
input: JSON.parse(row.input_json) as KernelDelegationPacket,
result: row.result_json
? (JSON.parse(row.result_json) as KernelSubagentInvocationResult)
: undefined,
startedAt: row.started_at,
finishedAt: row.finished_at,
};
}
}
export const kernelSessionRepository = new KernelSessionRepository();

132
src/agent-kernel/types.ts Normal file
View File

@@ -0,0 +1,132 @@
export type KernelTaskKind = 'skill' | 'subagent';
export interface KernelTask {
kind: KernelTaskKind;
name: string;
input?: Record<string, unknown>;
}
export interface KernelDelegationPacket {
goal: string;
parentTaskName: string;
input: Record<string, unknown>;
parentSessionId: string;
parentRunId: string;
contextSummary?: string;
}
export interface KernelTaskDefinition {
kind: KernelTaskKind;
name: string;
description: string;
resumable?: boolean;
}
export type KernelAgentSource = 'built-in' | 'custom' | 'plugin';
export interface KernelSubagentDefinition<TState> extends KernelTaskDefinition {
kind: 'subagent';
name: string;
source: KernelAgentSource;
whenToUse: string;
tags?: string[];
modelRole?: string;
maxTurns?: number;
background?: boolean;
execute(
task: KernelTask,
context: KernelAgentExecutionContext<TState>
): Promise<KernelHandlerResult<TState> | undefined>;
}
export interface KernelCheckpoint<TState> {
state: TState;
pendingTasks: KernelTask[];
stopReason?: string;
}
export interface KernelSessionRecord {
id: string;
scopeType: 'pull_request' | 'commit';
scopeKey: string;
metadata: Record<string, unknown>;
createdAt: string;
updatedAt: string;
lastRunId?: string;
}
export interface KernelSessionEventRecord {
id: string;
sessionId: string;
eventType: string;
payload: Record<string, unknown>;
createdAt: string;
}
export interface KernelSubagentContextRecord {
agentId: string;
parentSessionId: string;
agentType: 'subagent';
subagentName: string;
source: KernelAgentSource;
invocationKind: 'spawn' | 'resume';
}
export interface KernelSubagentInvocationRecord {
id: string;
parentSessionId: string;
parentRunId: string;
parentTaskName: string;
subagentName: string;
agentId: string;
status: 'running' | 'completed' | 'failed';
input: KernelDelegationPacket;
result?: KernelSubagentInvocationResult;
startedAt: string;
finishedAt?: string;
}
export interface KernelSubagentInvocationResult {
agentId: string;
agentType: string;
summary: string;
totalDurationMs: number;
totalToolUseCount: number;
totalTokens: number;
artifacts?: Record<string, unknown>;
}
export interface KernelExecutionContext<TState> {
session: KernelSessionRecord;
runId: string;
state: TState;
}
export interface KernelAgentExecutionContext<TState> extends KernelExecutionContext<TState> {
agent: KernelSubagentDefinition<TState>;
delegation: KernelDelegationPacket;
}
export interface KernelPlanningContext<TState> extends KernelExecutionContext<TState> {
pendingTasks: KernelTask[];
}
export interface KernelHandlerResult<TState> {
state?: TState;
enqueue?: KernelTask[];
prepend?: KernelTask[];
stopReason?: string;
summary?: string;
artifacts?: Record<string, unknown>;
}
export interface KernelTaskHandler<TState> extends KernelTaskDefinition {
execute(
task: KernelTask,
context: KernelExecutionContext<TState>
): Promise<KernelHandlerResult<TState> | undefined>;
}
export interface KernelTurnPlanner<TState> {
plan(context: KernelPlanningContext<TState>): KernelTask[];
}

View File

@@ -66,7 +66,7 @@ describe('ConfigManager (DB backend)', () => {
describe('getCurrent() defaults', () => {
test('returns default engine when DB is empty', () => {
expect(configManager.getCurrent().review.engine).toBe('agent');
expect(configManager.getCurrent().review.engine).toBe('kernel');
});
test('reads port from process.env.PORT, defaults to 5174', () => {
@@ -105,18 +105,18 @@ describe('ConfigManager (DB backend)', () => {
describe('setOverrides() and getSource()', () => {
test('setOverrides writes to DB, getCurrent reflects the change', async () => {
await configManager.setOverrides({ REVIEW_ENGINE: 'agent' });
expect(configManager.getCurrent().review.engine).toBe('agent');
await configManager.setOverrides({ REVIEW_ENGINE: 'kernel' });
expect(configManager.getCurrent().review.engine).toBe('kernel');
});
test('setOverrides with empty string deletes the key (resets to default)', async () => {
await configManager.setOverrides({ REVIEW_ENGINE: 'agent' });
await configManager.setOverrides({ REVIEW_ENGINE: 'kernel' });
await configManager.setOverrides({ REVIEW_ENGINE: '' });
expect(configManager.getCurrent().review.engine).toBe('agent');
expect(configManager.getCurrent().review.engine).toBe('kernel');
});
test('getSource returns "db" when value is stored', async () => {
await configManager.setOverrides({ REVIEW_ENGINE: 'agent' });
await configManager.setOverrides({ REVIEW_ENGINE: 'kernel' });
expect(configManager.getSource('REVIEW_ENGINE')).toBe('db');
});
@@ -131,7 +131,7 @@ describe('ConfigManager (DB backend)', () => {
test('unknown keys are silently ignored', async () => {
await configManager.setOverrides({ UNKNOWN_KEY_XYZ: 'value' });
expect(configManager.getCurrent().review.engine).toBe('agent');
expect(configManager.getCurrent().review.engine).toBe('kernel');
});
});
@@ -139,15 +139,15 @@ describe('ConfigManager (DB backend)', () => {
describe('resetKeys()', () => {
test('resetKeys deletes key from DB, value reverts to default', async () => {
await configManager.setOverrides({ REVIEW_ENGINE: 'agent' });
await configManager.setOverrides({ REVIEW_ENGINE: 'kernel' });
await configManager.resetKeys(['REVIEW_ENGINE']);
expect(configManager.getCurrent().review.engine).toBe('agent');
expect(configManager.getCurrent().review.engine).toBe('kernel');
expect(configManager.getSource('REVIEW_ENGINE')).toBe('default');
});
test('resetKeys on non-existent key does not throw', async () => {
await configManager.resetKeys(['REVIEW_ENGINE']);
expect(configManager.getCurrent().review.engine).toBe('agent');
expect(configManager.getCurrent().review.engine).toBe('kernel');
});
});
@@ -171,9 +171,9 @@ describe('ConfigManager (DB backend)', () => {
});
test('seedDefaults is idempotent — no-op when DB already has entries', async () => {
await configManager.setOverrides({ REVIEW_ENGINE: 'agent' });
await configManager.setOverrides({ REVIEW_ENGINE: 'kernel' });
configManager.seedDefaults();
expect(configManager.getCurrent().review.engine).toBe('agent');
expect(configManager.getCurrent().review.engine).toBe('kernel');
});
test('ADMIN_PASSWORD defaults to "password"', () => {
@@ -196,13 +196,13 @@ describe('ConfigManager (DB backend)', () => {
describe('type conversions in getCurrent()', () => {
test('boolean field "true" → true', async () => {
await configManager.setOverrides({ REVIEW_ENABLE_HUMAN_GATE: 'true' });
expect(configManager.getCurrent().review.enableHumanGate).toBe(true);
await configManager.setOverrides({ ENABLE_TRIAGE: 'true' });
expect(configManager.getCurrent().review.enableTriage).toBe(true);
});
test('boolean field "false" → false', async () => {
await configManager.setOverrides({ REVIEW_ENABLE_HUMAN_GATE: 'false' });
expect(configManager.getCurrent().review.enableHumanGate).toBe(false);
await configManager.setOverrides({ ENABLE_TRIAGE: 'false' });
expect(configManager.getCurrent().review.enableTriage).toBe(false);
});
test('number field is parsed correctly', async () => {

View File

@@ -32,14 +32,12 @@ export interface AppConfig {
giteaAdminToken: string | undefined;
};
review: {
engine: 'agent' | 'codex';
engine: 'codex' | 'kernel';
workdir: string;
globalPrompt: string | undefined;
maxParallelRuns: number;
maxFilesPerRun: number;
maxFileContentChars: number;
autoPublishMinConfidence: number;
enableHumanGate: boolean;
allowedCommands: string[];
commandTimeoutMs: number;
llmMaxConcurrentCalls: number;
@@ -58,13 +56,6 @@ export interface AppConfig {
codexModel: string;
codexTimeoutMs: number;
codexReviewPrompt: string | undefined;
qdrantUrl: string | undefined;
enableMemory: boolean;
fewShotExamplesCount: number;
enableReflection: boolean;
maxReflectionRounds: number;
enableDebate: boolean;
debateThreshold: string;
};
}
@@ -139,8 +130,8 @@ class ConfigManager {
return {
gitea: {
apiUrl: values.GITEA_API_URL ?? 'http://localhost:5174/api/v1',
accessToken: values.GITEA_ACCESS_TOKEN ?? 'test_token',
apiUrl: values.GITEA_API_URL ?? '',
accessToken: values.GITEA_ACCESS_TOKEN ?? '',
},
notification: {
feishu: {
@@ -163,14 +154,12 @@ class ConfigManager {
giteaAdminToken: values.GITEA_ADMIN_TOKEN,
},
review: {
engine: values.REVIEW_ENGINE === 'codex' ? 'codex' : 'agent',
engine: values.REVIEW_ENGINE === 'codex' ? 'codex' : 'kernel',
workdir: values.REVIEW_WORKDIR ?? '/tmp/gitea-assistant',
globalPrompt: values.GLOBAL_PROMPT,
maxParallelRuns: toNumber('REVIEW_MAX_PARALLEL_RUNS', 2),
maxFilesPerRun: toNumber('REVIEW_MAX_FILES_PER_RUN', 200),
maxFileContentChars: toNumber('REVIEW_MAX_FILE_CONTENT_CHARS', 40000),
autoPublishMinConfidence: toNumber('REVIEW_AUTO_PUBLISH_MIN_CONFIDENCE', 0.8),
enableHumanGate: toBoolean('REVIEW_ENABLE_HUMAN_GATE', true),
allowedCommands: toStringArray('REVIEW_ALLOWED_COMMANDS', [
'git',
'rg',
@@ -195,13 +184,6 @@ class ConfigManager {
codexModel: values.CODEX_MODEL ?? 'o3',
codexTimeoutMs: toNumber('CODEX_TIMEOUT_MS', 300000),
codexReviewPrompt: values.CODEX_REVIEW_PROMPT,
qdrantUrl: values.QDRANT_URL,
enableMemory: toBoolean('ENABLE_MEMORY', false),
fewShotExamplesCount: toNumber('FEW_SHOT_EXAMPLES_COUNT', 10),
enableReflection: toBoolean('ENABLE_REFLECTION', false),
maxReflectionRounds: toNumber('MAX_REFLECTION_ROUNDS', 2),
enableDebate: toBoolean('ENABLE_DEBATE', false),
debateThreshold: values.DEBATE_THRESHOLD ?? 'high',
},
};
}

View File

@@ -7,7 +7,7 @@
// Types
// ---------------------------------------------------------------------------
export type ConfigGroup = 'gitea' | 'notification' | 'security' | 'review' | 'memory';
export type ConfigGroup = 'gitea' | 'notification' | 'security' | 'review';
export type ConfigFieldType = 'string' | 'number' | 'boolean' | 'url' | 'text' | 'enum';
@@ -57,15 +57,9 @@ export const CONFIG_GROUPS: ConfigGroupMeta[] = [
{
key: 'review',
label: '审查引擎',
description: 'Agent 审查模式、并发与沙箱设置',
description: 'Kernel/Codex 审查模式、并发与沙箱设置',
icon: 'file-check',
},
{
key: 'memory',
label: '记忆与学习',
description: '向量记忆、反思与辩论系统',
icon: 'brain',
},
];
// ---------------------------------------------------------------------------
@@ -188,17 +182,17 @@ export const CONFIG_FIELDS: ConfigFieldMeta[] = [
envKey: 'REVIEW_ENGINE',
group: 'review',
label: '审查引擎',
description: '代码审查模式:agent任务化分级编排codexCodex CLI',
description: '代码审查模式codexCodex CLI)或 kernelsession 驱动 agentic loop',
type: 'enum',
sensitive: false,
enumValues: ['agent', 'codex'],
defaultValue: 'agent',
enumValues: ['codex', 'kernel'],
defaultValue: 'kernel',
},
{
envKey: 'REVIEW_WORKDIR',
group: 'review',
label: '工作目录',
description: 'Agent 模式下本地仓库 mirror/worktree 的工作目录',
description: 'Kernel 审查模式下本地仓库 mirror/worktree 的工作目录',
type: 'string',
sensitive: false,
defaultValue: '/tmp/gitea-assistant',
@@ -236,26 +230,6 @@ export const CONFIG_FIELDS: ConfigFieldMeta[] = [
max: 1000000,
defaultValue: 40000,
},
{
envKey: 'REVIEW_AUTO_PUBLISH_MIN_CONFIDENCE',
group: 'review',
label: '自动发布置信度',
description: '自动发布评论所需的最小置信度0~1',
type: 'number',
sensitive: false,
min: 0,
max: 1,
defaultValue: 0.8,
},
{
envKey: 'REVIEW_ENABLE_HUMAN_GATE',
group: 'review',
label: '人工审批',
description: '是否启用人工审批队列(低置信度评论需人工确认后发布)',
type: 'boolean',
sensitive: false,
defaultValue: true,
},
{
envKey: 'REVIEW_ALLOWED_COMMANDS',
group: 'review',
@@ -442,75 +416,6 @@ export const CONFIG_FIELDS: ConfigFieldMeta[] = [
type: 'text',
sensitive: false,
},
// ── 记忆与学习 ──────────────────────────────────────────────────────────
{
envKey: 'QDRANT_URL',
group: 'memory',
label: 'Qdrant 地址',
description: 'Qdrant 向量数据库的连接 URL',
type: 'url',
sensitive: false,
},
{
envKey: 'ENABLE_MEMORY',
group: 'memory',
label: '启用记忆',
description: '是否启用向量记忆系统(需配置 Qdrant',
type: 'boolean',
sensitive: false,
defaultValue: false,
},
{
envKey: 'FEW_SHOT_EXAMPLES_COUNT',
group: 'memory',
label: 'Few-shot 示例数',
description: '检索的 few-shot 示例数量',
type: 'number',
sensitive: false,
min: 0,
max: 20,
defaultValue: 10,
},
{
envKey: 'ENABLE_REFLECTION',
group: 'memory',
label: '启用反思',
description: '是否启用审查结果自我反思机制',
type: 'boolean',
sensitive: false,
defaultValue: false,
},
{
envKey: 'MAX_REFLECTION_ROUNDS',
group: 'memory',
label: '最大反思轮数',
description: '反思迭代的最大轮数',
type: 'number',
sensitive: false,
min: 1,
max: 5,
defaultValue: 2,
},
{
envKey: 'ENABLE_DEBATE',
group: 'memory',
label: '启用辩论',
description: '是否启用多视角辩论机制',
type: 'boolean',
sensitive: false,
defaultValue: false,
},
{
envKey: 'DEBATE_THRESHOLD',
group: 'memory',
label: '辩论阈值',
description: '触发辩论的严重程度阈值',
type: 'enum',
sensitive: false,
enumValues: ['high', 'medium'],
defaultValue: 'high',
},
];
// ---------------------------------------------------------------------------

View File

@@ -0,0 +1,434 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test';
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import path from 'node:path';
import { Hono } from 'hono';
import { kernelSessionRepository } from '../../agent-kernel/session/session-repository';
import type { KernelSessionRecord } from '../../agent-kernel/types';
import { closeDatabase, initDatabase } from '../../db/database';
import { kernelReviewEngine } from '../../review/kernel/kernel-review-engine';
import {
REVIEW_FULL_REVIEW_SUBAGENT,
REVIEW_TRIAGE_SUBAGENT,
} from '../../review/kernel/review-subagent-ids';
import { adminController } from '../admin';
function createTestApp(): Hono {
const app = new Hono();
app.route('/admin/api', adminController.protectedRoutes);
return app;
}
function createRunDetails(runId: string) {
const timestamp = '2026-04-13T10:00:00.000Z';
return {
run: {
id: runId,
idempotencyKey: 'pr:acme/repo:42:sha-123',
eventType: 'pull_request' as const,
status: 'in_progress' as const,
owner: 'acme',
repo: 'repo',
cloneUrl: 'https://example.com/acme/repo.git',
prNumber: 42,
baseSha: 'base-sha',
headSha: 'sha-123',
commitSha: 'sha-123',
attempts: 0,
maxAttempts: 2,
createdAt: timestamp,
updatedAt: timestamp,
startedAt: timestamp,
},
steps: [
{
id: 'step-1',
runId,
stepName: 'prepare_workspace',
status: 'succeeded' as const,
startedAt: timestamp,
finishedAt: '2026-04-13T10:00:10.000Z',
latencyMs: 10,
},
{
id: 'step-2',
runId,
stepName: REVIEW_TRIAGE_SUBAGENT,
agentName: REVIEW_TRIAGE_SUBAGENT,
status: 'started' as const,
startedAt: '2026-04-13T10:00:11.000Z',
},
],
findings: [
{
id: 'finding-1',
runId,
fingerprint: 'fp-1',
category: 'correctness' as const,
severity: 'high' as const,
confidence: 0.98,
path: 'src/index.ts',
line: 12,
title: 'Potential null dereference',
detail: 'Value can be undefined before access.',
evidence: 'line 12 reads target.value without guard',
suggestion: 'Guard the value before use.',
published: false,
},
],
comments: [
{
id: 'comment-1',
runId,
path: 'src/index.ts',
line: 12,
body: 'Please add a null guard here.',
status: 'published' as const,
createdAt: timestamp,
fingerprint: 'fp-1',
},
],
};
}
function seedReviewSession(): { session: KernelSessionRecord; runId: string } {
const runId = 'run-42';
const session = kernelSessionRepository.ensureSession({
scopeType: 'pull_request',
scopeKey: 'acme/repo#42',
metadata: {
owner: 'acme',
repo: 'repo',
prNumber: 42,
headSha: 'sha-123',
},
runId,
});
kernelSessionRepository.appendEvent(session.id, 'task_started', {
name: 'prepare_workspace',
kind: 'skill',
agentId: 'agent-prepare-123456',
});
kernelSessionRepository.appendEvent(session.id, 'task_completed', {
name: 'prepare_workspace',
kind: 'skill',
summary: 'workspace ready',
});
kernelSessionRepository.appendEvent(session.id, 'task_started', {
name: 'build_context',
kind: 'skill',
});
kernelSessionRepository.appendEvent(session.id, 'task_completed', {
name: 'build_context',
kind: 'skill',
summary: 'diff captured',
});
kernelSessionRepository.appendEvent(session.id, 'task_started', {
name: REVIEW_TRIAGE_SUBAGENT,
kind: 'subagent',
agentId: 'agent-triage-abcdef',
});
kernelSessionRepository.appendEvent(session.id, 'task_completed', {
name: REVIEW_TRIAGE_SUBAGENT,
kind: 'subagent',
summary: 'review hints created',
});
kernelSessionRepository.appendEvent(session.id, 'task_started', {
name: REVIEW_FULL_REVIEW_SUBAGENT,
kind: 'subagent',
agentId: 'agent-full-abcdef',
});
kernelSessionRepository.appendEvent(session.id, 'task_completed', {
name: REVIEW_FULL_REVIEW_SUBAGENT,
kind: 'subagent',
summary: 'full review done',
});
kernelSessionRepository.appendEvent(session.id, 'task_started', {
name: 'aggregate_findings',
kind: 'skill',
});
kernelSessionRepository.appendEvent(session.id, 'task_completed', {
name: 'aggregate_findings',
kind: 'skill',
summary: '2 findings triaged',
stopReason: 'completed',
});
kernelSessionRepository.appendEvent(session.id, 'human_feedback_processed', {
approved: true,
fingerprint: 'fp-1',
});
kernelSessionRepository.saveCheckpoint(session.id, {
state: {
targetSha: 'sha-123',
reviewCompleted: true,
findings: [{ fingerprint: 'fp-1' }, { fingerprint: 'fp-2' }],
published: false,
},
pendingTasks: [{ kind: 'skill', name: 'publish_review' }],
});
const invocation = kernelSessionRepository.createSubagentInvocation({
parentSessionId: session.id,
parentRunId: runId,
parentTaskName: REVIEW_FULL_REVIEW_SUBAGENT,
subagentName: REVIEW_FULL_REVIEW_SUBAGENT,
agentId: 'agent-full-abcdef',
packet: {
goal: 'Run a full autonomous review for changed files',
parentTaskName: REVIEW_FULL_REVIEW_SUBAGENT,
input: { mode: 'light', suspectedEntrypoints: ['src/index.ts'] },
parentSessionId: session.id,
parentRunId: runId,
contextSummary: 'Focus on nullable flow and async boundaries.',
},
});
kernelSessionRepository.completeSubagentInvocation(invocation.id, 'completed', {
agentId: 'agent-full-abcdef',
agentType: REVIEW_FULL_REVIEW_SUBAGENT,
summary: 'Found 2 review concerns',
totalDurationMs: 25,
totalToolUseCount: 3,
totalTokens: 1200,
artifacts: { findings: ['fp-1', 'fp-2'] },
});
return { session, runId };
}
describe('admin review session routes', () => {
let tempDir: string;
let savedDbPath: string | undefined;
const originalGetRunDetails = kernelReviewEngine.getRunDetails;
beforeEach(async () => {
tempDir = await mkdtemp(path.join(tmpdir(), 'admin-review-sessions-db-'));
savedDbPath = process.env.DATABASE_PATH;
process.env.DATABASE_PATH = path.join(tempDir, 'assistant.db');
initDatabase();
kernelReviewEngine.getRunDetails = async (runId: string) => createRunDetails(runId);
});
afterEach(async () => {
kernelReviewEngine.getRunDetails = originalGetRunDetails;
closeDatabase();
if (savedDbPath === undefined) {
Reflect.deleteProperty(process.env, 'DATABASE_PATH');
} else {
process.env.DATABASE_PATH = savedDbPath;
}
await rm(tempDir, { recursive: true, force: true });
});
test('GET /admin/api/review/sessions returns projected summaries from persisted history', async () => {
seedReviewSession();
const app = createTestApp();
const response = await app.request('http://localhost/admin/api/review/sessions?limit=10');
const payload = (await response.json()) as {
data: Array<{
session: KernelSessionRecord;
summary: {
status: string;
currentStep?: string;
findingCount: number;
pendingTaskCount: number;
owner?: string;
repo?: string;
prNumber?: number;
headSha?: string;
};
}>;
};
expect(response.status).toBe(200);
expect(payload.data).toHaveLength(1);
expect(payload.data[0]?.session.scopeKey).toBe('acme/repo#42');
expect(payload.data[0]?.summary.status).toBe('executing');
expect(payload.data[0]?.summary.currentStep).toBe('发布结果');
expect(payload.data[0]?.summary.findingCount).toBe(2);
expect(payload.data[0]?.summary.pendingTaskCount).toBe(1);
expect(payload.data[0]?.summary.owner).toBe('acme');
expect(payload.data[0]?.summary.repo).toBe('repo');
expect(payload.data[0]?.summary.prNumber).toBe(42);
expect(payload.data[0]?.summary.headSha).toBe('sha-123');
});
test('GET /admin/api/review/sessions/:sessionId returns checkpoint plan timeline subagent invocations and run details', async () => {
const { session, runId } = seedReviewSession();
const app = createTestApp();
const response = await app.request(`http://localhost/admin/api/review/sessions/${session.id}`);
const payload = (await response.json()) as {
session: KernelSessionRecord;
summary: {
status: string;
currentStep?: string;
findingCount: number;
pendingTaskCount: number;
};
checkpoint: {
state: {
targetSha: string;
reviewCompleted: boolean;
findings: Array<{ fingerprint: string }>;
published: boolean;
};
pendingTasks: Array<{ kind: string; name: string }>;
};
plan: Array<{ key: string; label: string; status: string; progressText?: string }>;
timeline: Array<{ title: string; detail: string; tone: string }>;
subagentInvocations: Array<{
parentRunId: string;
subagentName: string;
status: string;
input: { goal: string; contextSummary?: string; input: Record<string, unknown> };
result?: { summary: string; totalDurationMs: number; totalToolUseCount: number };
}>;
runDetails: ReturnType<typeof createRunDetails>;
};
expect(response.status).toBe(200);
expect(payload.session.id).toBe(session.id);
expect(payload.summary.status).toBe('executing');
expect(payload.summary.currentStep).toBe('发布结果');
expect(payload.summary.findingCount).toBe(2);
expect(payload.summary.pendingTaskCount).toBe(1);
expect(payload.checkpoint.state.targetSha).toBe('sha-123');
expect(payload.checkpoint.state.reviewCompleted).toBe(true);
expect(payload.checkpoint.state.findings.map((finding) => finding.fingerprint)).toEqual([
'fp-1',
'fp-2',
]);
expect(payload.checkpoint.state.published).toBe(false);
expect(payload.checkpoint.pendingTasks).toEqual([{ kind: 'skill', name: 'publish_review' }]);
const planByKey = new Map(payload.plan.map((step) => [step.key, step]));
expect(planByKey.get('prepare_workspace')).toMatchObject({
label: '准备工作区',
status: 'completed',
});
expect(planByKey.get('build_context')).toMatchObject({
label: '构建上下文',
status: 'completed',
});
expect(planByKey.get(REVIEW_TRIAGE_SUBAGENT)).toMatchObject({
label: '生成审查提示',
status: 'completed',
});
expect(planByKey.get(REVIEW_FULL_REVIEW_SUBAGENT)).toMatchObject({
label: '完整自主审查',
status: 'completed',
progressText: 'full review completed',
});
expect(planByKey.get('aggregate_findings')).toMatchObject({
label: '聚合与筛选',
status: 'completed',
});
expect(planByKey.get('publish_review')).toMatchObject({
label: '发布结果',
status: 'queued',
});
expect(payload.timeline.length).toBeGreaterThanOrEqual(6);
expect(
payload.timeline.some(
(entry) => entry.title === '开始执行 prepare_workspace' && entry.tone === 'neutral'
)
).toBe(true);
expect(payload.timeline.some((entry) => entry.detail.includes('workspace ready'))).toBe(true);
expect(payload.timeline.some((entry) => entry.title === '人工反馈已写回')).toBe(true);
expect(payload.timeline.some((entry) => entry.detail.includes('finding 已确认发布'))).toBe(
true
);
expect(payload.subagentInvocations).toHaveLength(1);
expect(payload.subagentInvocations[0]).toMatchObject({
parentRunId: runId,
subagentName: REVIEW_FULL_REVIEW_SUBAGENT,
status: 'completed',
input: {
goal: 'Run a full autonomous review for changed files',
contextSummary: 'Focus on nullable flow and async boundaries.',
},
});
expect(payload.subagentInvocations[0]?.input.input).toEqual({
mode: 'light',
suspectedEntrypoints: ['src/index.ts'],
});
expect(payload.subagentInvocations[0]?.result).toMatchObject({
summary: 'Found 2 review concerns',
totalDurationMs: 25,
totalToolUseCount: 3,
});
expect(payload.runDetails.run).toMatchObject({
id: runId,
status: 'in_progress',
owner: 'acme',
repo: 'repo',
prNumber: 42,
headSha: 'sha-123',
});
expect(payload.runDetails.steps).toHaveLength(2);
expect(payload.runDetails.steps[0]).toMatchObject({
stepName: 'prepare_workspace',
status: 'succeeded',
});
expect(payload.runDetails.findings[0]).toMatchObject({
fingerprint: 'fp-1',
category: 'correctness',
severity: 'high',
});
expect(payload.runDetails.comments[0]).toMatchObject({
fingerprint: 'fp-1',
status: 'published',
});
});
test('GET /admin/api/review/kernel catalog routes return arrays', async () => {
const app = createTestApp();
const tasksResponse = await app.request('http://localhost/admin/api/review/kernel/tasks');
const subagentsResponse = await app.request(
'http://localhost/admin/api/review/kernel/subagents'
);
const hooksResponse = await app.request('http://localhost/admin/api/review/kernel/hooks');
const tasksPayload = (await tasksResponse.json()) as { data: unknown[] };
const subagentsPayload = (await subagentsResponse.json()) as { data: unknown[] };
const hooksPayload = (await hooksResponse.json()) as { data: unknown[] };
expect(tasksResponse.status).toBe(200);
expect(subagentsResponse.status).toBe(200);
expect(hooksResponse.status).toBe(200);
expect(Array.isArray(tasksPayload.data)).toBe(true);
expect(Array.isArray(subagentsPayload.data)).toBe(true);
expect(Array.isArray(hooksPayload.data)).toBe(true);
expect(tasksPayload.data.length).toBeGreaterThan(0);
expect(subagentsPayload.data.length).toBeGreaterThan(0);
expect(hooksPayload.data.length).toBeGreaterThan(0);
const subagents = subagentsPayload.data as Array<{ name?: string; tags?: string[] }>;
const fullReviewSubagent = subagents.find(
(subagent) => subagent.name === REVIEW_FULL_REVIEW_SUBAGENT
);
expect(subagents).toEqual(
expect.arrayContaining([
expect.objectContaining({ name: REVIEW_TRIAGE_SUBAGENT }),
expect.objectContaining({ name: REVIEW_FULL_REVIEW_SUBAGENT }),
])
);
expect(fullReviewSubagent?.tags).toEqual(
expect.arrayContaining(['full-review', 'autonomous-review'])
);
expect(subagents.some((subagent) => subagent.name?.startsWith('review:specialist:'))).toBe(
false
);
expect(subagents.some((subagent) => subagent.tags?.includes('domain-review'))).toBe(false);
});
});

View File

@@ -0,0 +1,366 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test';
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import path from 'node:path';
import { Hono } from 'hono';
import { kernelSessionRepository } from '../../agent-kernel/session/session-repository';
import type { KernelSessionEventRecord, KernelSessionRecord } from '../../agent-kernel/types';
import { closeDatabase, initDatabase } from '../../db/database';
import { kernelReviewEngine } from '../../review/kernel/kernel-review-engine';
import { getReviewSessionScope } from '../../review/kernel/session-scope';
import { FileReviewStore } from '../../review/store/file-review-store';
import type {
Finding,
PullRequestReviewPayload,
ReviewCommentRecord,
ReviewRun,
} from '../../review/types';
import { giteaService } from '../../services/gitea';
import { feedbackRouter, initializeFeedbackSystem } from '../feedback';
function createTestApp(): Hono {
const app = new Hono();
app.route('/feedback', feedbackRouter);
return app;
}
async function jsonRequest(app: Hono, findingId: string, approved: boolean, reason?: string) {
const response = await app.request(`http://localhost/feedback/finding/${findingId}`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ approved, reason }),
});
return {
response,
payload: (await response.json()) as Record<string, unknown>,
};
}
function createPullRequestPayload(keySuffix: string): PullRequestReviewPayload {
return {
idempotencyKey: `pr:acme/repo:42:${keySuffix}`,
eventType: 'pull_request',
owner: 'acme',
repo: 'repo',
cloneUrl: 'https://example.com/acme/repo.git',
prNumber: 42,
baseSha: 'base-sha',
headSha: `head-${keySuffix}`,
maxAttempts: 2,
};
}
function createFinding(runId: string, index: number): Finding {
return {
id: `finding-${index}`,
runId,
fingerprint: `fp-${index}`,
category: 'correctness',
severity: index === 1 ? 'high' : 'medium',
confidence: index === 1 ? 0.99 : 0.91,
path: 'src/index.ts',
line: 10 + index,
title: `Potential issue ${index}`,
detail: `Detail for finding ${index}`,
evidence: `Evidence for finding ${index}`,
suggestion: `Suggestion for finding ${index}`,
published: false,
};
}
async function seedRunWithSession(
store: FileReviewStore,
options: { keySuffix: string; findingCount?: number }
): Promise<{
run: ReviewRun;
findings: Finding[];
session: KernelSessionRecord;
}> {
const payload = createPullRequestPayload(options.keySuffix);
const { run } = await store.createOrReuseRun(payload);
const findings = Array.from({ length: options.findingCount ?? 1 }, (_, index) =>
createFinding(run.id, index + 1)
);
await store.addFindings(run.id, findings);
const { scopeType, scopeKey } = getReviewSessionScope(run);
const session = kernelSessionRepository.ensureSession({
scopeType,
scopeKey,
metadata: {
owner: run.owner,
repo: run.repo,
prNumber: run.prNumber,
eventType: run.eventType,
headSha: run.headSha,
},
runId: run.id,
});
return { run, findings, session };
}
function getRelevantEvents(sessionId: string): KernelSessionEventRecord[] {
return kernelSessionRepository
.listEvents(sessionId)
.filter((event) =>
[
'human_feedback_processed',
'session_continue_requested',
'session_continue_completed',
].includes(event.eventType)
);
}
function getEventByType(
events: KernelSessionEventRecord[],
eventType: KernelSessionEventRecord['eventType']
): KernelSessionEventRecord | undefined {
return events.find((event) => event.eventType === eventType);
}
describe('feedback kernel session integration', () => {
let tempDir: string;
let app: Hono;
let store: FileReviewStore;
let savedDbPath: string | undefined;
let pullRequestCommentCalls: string[];
let commitCommentCalls: string[];
let continueSessionCalls: string[];
const originalAddPullRequestComment = giteaService.addPullRequestComment;
const originalAddCommitComment = giteaService.addCommitComment;
const originalContinueSession = kernelReviewEngine.continueSession;
beforeEach(async () => {
tempDir = await mkdtemp(path.join(tmpdir(), 'feedback-kernel-session-'));
savedDbPath = process.env.DATABASE_PATH;
process.env.DATABASE_PATH = path.join(tempDir, 'assistant.db');
initDatabase();
store = new FileReviewStore(path.join(tempDir, 'review-workdir'));
await store.init();
initializeFeedbackSystem(store);
app = createTestApp();
pullRequestCommentCalls = [];
commitCommentCalls = [];
continueSessionCalls = [];
giteaService.addPullRequestComment = async (_owner, _repo, _prNumber, body) => {
pullRequestCommentCalls.push(body);
};
giteaService.addCommitComment = async (_owner, _repo, _commitSha, body) => {
commitCommentCalls.push(body);
};
kernelReviewEngine.continueSession = async (sessionId: string) => {
continueSessionCalls.push(sessionId);
const session = kernelSessionRepository.getSessionById(sessionId);
if (!session?.lastRunId) {
return false;
}
kernelSessionRepository.appendEvent(sessionId, 'session_continue_requested', {
runId: session.lastRunId,
});
kernelSessionRepository.appendEvent(sessionId, 'session_continue_completed', {
runId: session.lastRunId,
});
return true;
};
});
afterEach(async () => {
giteaService.addPullRequestComment = originalAddPullRequestComment;
giteaService.addCommitComment = originalAddCommitComment;
kernelReviewEngine.continueSession = originalContinueSession;
closeDatabase();
if (savedDbPath === undefined) {
Reflect.deleteProperty(process.env, 'DATABASE_PATH');
} else {
process.env.DATABASE_PATH = savedDbPath;
}
await rm(tempDir, { recursive: true, force: true });
});
test('approve success publishes comment records events and triggers continuation when last pending finding is processed', async () => {
const { run, findings, session } = await seedRunWithSession(store, {
keySuffix: 'approve-success',
});
const { response, payload } = await jsonRequest(app, findings[0].id, true, 'needs fix');
const runDetails = await store.getRunDetails(run.id);
const persistedFinding = await store.getFinding(findings[0].id);
const events = getRelevantEvents(session.id);
expect(response.status).toBe(200);
expect(payload).toMatchObject({
success: true,
message: '已标记为有效问题并发布到Gitea',
published: true,
});
expect(persistedFinding?.published).toBe(true);
expect(runDetails?.comments).toHaveLength(1);
expect(runDetails?.comments[0]).toMatchObject({
runId: run.id,
status: 'published',
path: findings[0].path,
line: findings[0].line,
fingerprint: findings[0].fingerprint,
} satisfies Partial<ReviewCommentRecord>);
expect(runDetails?.comments[0]?.body).toContain('人工审批确认');
expect(runDetails?.comments[0]?.body).toContain(findings[0].title);
expect(pullRequestCommentCalls).toHaveLength(1);
expect(commitCommentCalls).toHaveLength(0);
expect(continueSessionCalls).toEqual([session.id]);
expect(events.map((event) => event.eventType).sort()).toEqual([
'human_feedback_processed',
'session_continue_completed',
'session_continue_requested',
]);
expect(getEventByType(events, 'human_feedback_processed')?.payload).toEqual({
runId: run.id,
findingId: findings[0].id,
approved: true,
reason: 'needs fix',
published: true,
});
expect(getEventByType(events, 'session_continue_requested')?.payload).toEqual({
runId: run.id,
});
expect(getEventByType(events, 'session_continue_completed')?.payload).toEqual({
runId: run.id,
});
});
test('reject success records local handling without Gitea publish and still triggers continuation on final pending finding', async () => {
const { run, findings, session } = await seedRunWithSession(store, {
keySuffix: 'reject-success',
});
const { response, payload } = await jsonRequest(app, findings[0].id, false, 'false positive');
const runDetails = await store.getRunDetails(run.id);
const persistedFinding = await store.getFinding(findings[0].id);
const events = getRelevantEvents(session.id);
expect(response.status).toBe(200);
expect(payload).toMatchObject({
success: true,
message: '已标记为误报',
published: false,
});
expect(persistedFinding?.published).toBe(true);
expect(runDetails?.comments).toHaveLength(1);
expect(runDetails?.comments[0]).toMatchObject({
runId: run.id,
status: 'published',
fingerprint: findings[0].fingerprint,
} satisfies Partial<ReviewCommentRecord>);
expect(runDetails?.comments[0]?.body).toBe(`REJECTED: ${findings[0].title} - false positive`);
expect(pullRequestCommentCalls).toHaveLength(0);
expect(commitCommentCalls).toHaveLength(0);
expect(continueSessionCalls).toEqual([session.id]);
expect(events.map((event) => event.eventType).sort()).toEqual([
'human_feedback_processed',
'session_continue_completed',
'session_continue_requested',
]);
expect(getEventByType(events, 'human_feedback_processed')?.payload).toEqual({
runId: run.id,
findingId: findings[0].id,
approved: false,
reason: 'false positive',
published: false,
});
});
test('duplicate approve returns idempotent success without duplicating comment records or continuation events', async () => {
const { run, findings, session } = await seedRunWithSession(store, {
keySuffix: 'duplicate-approve',
});
const first = await jsonRequest(app, findings[0].id, true, 'needs fix');
const second = await jsonRequest(app, findings[0].id, true, 'retry request');
const runDetails = await store.getRunDetails(run.id);
const events = getRelevantEvents(session.id);
expect(first.response.status).toBe(200);
expect(second.response.status).toBe(200);
expect(second.payload).toMatchObject({
success: true,
message: '该finding已处理过',
alreadyProcessed: true,
published: true,
});
expect(runDetails?.comments).toHaveLength(1);
expect(runDetails?.comments[0]?.fingerprint).toBe(findings[0].fingerprint);
expect(pullRequestCommentCalls).toHaveLength(1);
expect(continueSessionCalls).toEqual([session.id]);
expect(events.map((event) => event.eventType).sort()).toEqual([
'human_feedback_processed',
'session_continue_completed',
'session_continue_requested',
]);
});
test('Gitea publish failure rolls back published flag and leaves no local comment record or session events', async () => {
const { run, findings, session } = await seedRunWithSession(store, {
keySuffix: 'gitea-fail',
});
giteaService.addPullRequestComment = async () => {
throw new Error('gitea unavailable');
};
const { response, payload } = await jsonRequest(app, findings[0].id, true, 'needs fix');
const runDetails = await store.getRunDetails(run.id);
const persistedFinding = await store.getFinding(findings[0].id);
const events = getRelevantEvents(session.id);
expect(response.status).toBe(500);
expect(payload).toMatchObject({
error: 'Failed to process feedback',
details: 'gitea unavailable',
});
expect(persistedFinding?.published).toBe(false);
expect(runDetails?.comments).toEqual([]);
expect(events).toEqual([]);
expect(continueSessionCalls).toEqual([]);
});
test('local comment record failure rolls back published flag after Gitea publish and leaves no session events', async () => {
const { run, findings, session } = await seedRunWithSession(store, {
keySuffix: 'local-record-fail',
});
const originalAddCommentRecord = store.addCommentRecord.bind(store);
store.addCommentRecord = async () => {
throw new Error('local store write failed');
};
const { response, payload } = await jsonRequest(app, findings[0].id, true, 'needs fix');
const runDetails = await store.getRunDetails(run.id);
const persistedFinding = await store.getFinding(findings[0].id);
const events = getRelevantEvents(session.id);
store.addCommentRecord = originalAddCommentRecord;
expect(response.status).toBe(500);
expect(payload).toMatchObject({
error: 'Failed to process feedback',
details:
'Comment published to Gitea but failed to save locally. State rolled back, you may retry. Note: immediate retry may create duplicate comments.',
});
expect(persistedFinding?.published).toBe(false);
expect(runDetails?.comments).toEqual([]);
expect(pullRequestCommentCalls).toHaveLength(1);
expect(events).toEqual([]);
expect(continueSessionCalls).toEqual([]);
});
});

View File

@@ -160,7 +160,7 @@ describe('llm-config controller', () => {
const { data: roles } = await jsonRequest(app, 'GET', '/roles');
const assignedRoles = roles.filter((r: any) => r.providerId !== null);
expect(assignedRoles).toHaveLength(4);
expect(assignedRoles).toHaveLength(2);
});
test('rejects missing required fields', async () => {
@@ -326,7 +326,7 @@ describe('llm-config controller', () => {
test('returns all MODEL_ROLES with null assignments when unassigned', async () => {
const { status, data } = await jsonRequest(app, 'GET', '/roles');
expect(status).toBe(200);
expect(data).toHaveLength(4);
expect(data).toHaveLength(2);
expect(data[0]).toHaveProperty('role');
expect(data[0]).toHaveProperty('providerId');
});

View File

@@ -1,8 +1,16 @@
import { Hono } from 'hono';
import { sign } from 'hono/jwt';
import { kernelSessionRepository } from '../agent-kernel/session/session-repository';
import config from '../config';
import { repositoryReviewPromptRepo } from '../db/repositories/repository-review-prompt-repo';
import { reviewEngine } from '../review/engine';
import { kernelReviewEngine } from '../review/kernel/kernel-review-engine';
import {
buildReviewPlanSnapshot,
buildReviewSessionSummary,
buildReviewTimeline,
filterEventsByLatestHeadSha,
} from '../review/kernel/session-read-model';
import { getActiveReviewEngine } from '../review/review-engine-provider';
import { giteaService } from '../services/gitea';
import { toErrorLogMeta } from '../utils/error-log';
import { logger } from '../utils/logger';
@@ -173,7 +181,7 @@ protectedRoutes.delete('/repositories/:owner/:repo/webhook/:hookId', async (c) =
protectedRoutes.get('/review/runs', async (c) => {
try {
const limit = Number.parseInt(c.req.query('limit') || '50', 10);
const runs = await reviewEngine.listRuns(limit);
const runs = await getActiveReviewEngine().listRuns(limit);
return c.json({ data: runs });
} catch (error: any) {
logger.error('获取审查任务列表失败:', error);
@@ -185,7 +193,7 @@ protectedRoutes.get('/review/runs', async (c) => {
protectedRoutes.get('/review/runs/:runId', async (c) => {
try {
const { runId } = c.req.param();
const result = await reviewEngine.getRunDetails(runId);
const result = await getActiveReviewEngine().getRunDetails(runId);
if (!result) {
return c.json({ message: 'Run not found' }, 404);
}
@@ -196,6 +204,92 @@ protectedRoutes.get('/review/runs/:runId', async (c) => {
}
});
protectedRoutes.get('/review/sessions', (c) => {
try {
const limit = Number.parseInt(c.req.query('limit') || '50', 10);
const catalog = kernelReviewEngine.listTaskCatalog();
const sessions = kernelSessionRepository.listSessions(limit).map((session) => {
const checkpoint = kernelSessionRepository.loadCheckpoint(session.id);
const events = kernelSessionRepository.listEvents(session.id);
const plan = buildReviewPlanSnapshot(catalog, checkpoint, events);
return {
session,
summary: buildReviewSessionSummary(session, checkpoint, events, plan),
};
});
return c.json({ data: sessions });
} catch (error: any) {
logger.error('获取审查会话列表失败:', error);
return c.json({ message: 'Failed to fetch review sessions', error: error.message }, 500);
}
});
protectedRoutes.get('/review/sessions/:sessionId', async (c) => {
try {
const { sessionId } = c.req.param();
const session = kernelSessionRepository.getSessionById(sessionId);
if (!session) {
return c.json({ message: 'Session not found' }, 404);
}
const checkpoint = kernelSessionRepository.loadCheckpoint(sessionId);
const allEvents = kernelSessionRepository.listEvents(sessionId);
const events = filterEventsByLatestHeadSha(allEvents, checkpoint);
const catalog = kernelReviewEngine.listTaskCatalog();
const plan = buildReviewPlanSnapshot(catalog, checkpoint, events);
const summary = buildReviewSessionSummary(session, checkpoint, events, plan);
const runDetails = session.lastRunId
? await getActiveReviewEngine().getRunDetails(session.lastRunId)
: null;
const subagentInvocations = kernelSessionRepository.listSubagentInvocations(sessionId);
return c.json({
session,
summary,
checkpoint,
plan,
timeline: buildReviewTimeline(events),
events,
subagentInvocations,
runDetails,
});
} catch (error: any) {
logger.error('获取审查会话详情失败:', error);
return c.json({ message: 'Failed to fetch review session details', error: error.message }, 500);
}
});
protectedRoutes.get('/review/kernel/tasks', (c) => {
try {
return c.json({ data: kernelReviewEngine.listTaskCatalog() });
} catch (error: any) {
logger.error('获取 kernel 任务目录失败:', error);
return c.json({ message: 'Failed to fetch kernel task catalog', error: error.message }, 500);
}
});
protectedRoutes.get('/review/kernel/subagents', (c) => {
try {
return c.json({ data: kernelReviewEngine.listSubagentCatalog() });
} catch (error: any) {
logger.error('获取 kernel subagent 目录失败:', error);
return c.json(
{ message: 'Failed to fetch kernel subagent catalog', error: error.message },
500
);
}
});
protectedRoutes.get('/review/kernel/hooks', (c) => {
try {
return c.json({ data: kernelReviewEngine.listHookCatalog() });
} catch (error: any) {
logger.error('获取 kernel hook 目录失败:', error);
return c.json({ message: 'Failed to fetch kernel hook catalog', error: error.message }, 500);
}
});
export const adminController = {
publicRoutes,
protectedRoutes,

View File

@@ -1,33 +1,22 @@
import { zValidator } from '@hono/zod-validator';
import { Hono } from 'hono';
import { z } from 'zod';
import { kernelSessionRepository } from '../agent-kernel/session/session-repository';
import config from '../config';
import { LearningSystem } from '../review/learning/learning-system';
import { VectorMemoryStore } from '../review/memory/vector-store';
import { kernelReviewEngine } from '../review/kernel/kernel-review-engine';
import { getReviewSessionScope } from '../review/kernel/session-scope';
import { FileReviewStore } from '../review/store/file-review-store';
import { giteaService } from '../services/gitea';
const feedbackRouter = new Hono();
// 全局实例
let memoryStore: VectorMemoryStore | null = null;
let learningSystem: LearningSystem | null = null;
let reviewStore: FileReviewStore | null = null;
// 初始化反馈系统(记忆系统可选)
export function initializeFeedbackSystem(store: FileReviewStore): void {
// 保存store实例以供handlers重用避免多实例状态不同步
reviewStore = store;
// 记忆系统为可选功能
if (config.review.qdrantUrl && config.review.enableMemory) {
memoryStore = new VectorMemoryStore(config.review.qdrantUrl);
learningSystem = new LearningSystem(memoryStore, reviewStore);
memoryStore.initialize().catch((err) => {
console.error('Failed to initialize memory store:', err);
});
}
}
// 提交人工反馈
@@ -62,6 +51,10 @@ feedbackRouter.post(
return c.json({ error: 'Run not found' }, 404);
}
const session = kernelSessionRepository.getSessionByScopeKey(
getReviewSessionScope(runDetails.run).scopeKey
);
const { owner, repo } = runDetails.run;
// 原子幂等性保护先标记finding为published原子check-and-set
@@ -87,7 +80,6 @@ feedbackRouter.post(
success: true,
message: '该finding已处理过',
alreadyProcessed: true,
learningApplied: false,
published: true,
});
}
@@ -105,32 +97,6 @@ feedbackRouter.post(
// 以下代码只会被第一个请求执行wasUnpublished=true
let learningApplied = false;
// 如果记忆系统启用,尝试执行学习和向量存储(可选功能,失败不阻止审批流程)
if (memoryStore && learningSystem) {
try {
await memoryStore.storeFeedback(findingId, approved, reason || '', owner, repo);
if (approved) {
await learningSystem.learnFromApproval(finding, owner, repo);
} else {
await learningSystem.learnFromFalsePositive(
finding,
reason || '人工标记为误报',
owner,
repo
);
}
learningApplied = true;
} catch (memoryError) {
// 记忆系统故障不应阻止人工审批操作
console.error('Memory system operation failed (non-fatal):', memoryError);
learningApplied = false;
}
}
try {
// 如果批准发布到Gitea人工审批通过的问题应该通知开发者
if (approved) {
@@ -210,11 +176,29 @@ _此问题已通过人工审批确认_`;
}
// finding已在开头原子标记为published处理成功则保持published状态
if (session) {
kernelSessionRepository.appendEvent(session.id, 'human_feedback_processed', {
runId: finding.runId,
findingId,
approved,
reason: reason || null,
published: approved,
});
if (config.review.engine === 'kernel') {
const latestRunDetails = await reviewStore.getRunDetails(finding.runId);
const hasRemainingPendingFindings =
latestRunDetails?.findings.some((item) => !item.published) ?? false;
if (!hasRemainingPendingFindings) {
await kernelReviewEngine.continueSession(session.id);
}
}
}
return c.json({
success: true,
message: approved ? '已标记为有效问题并发布到Gitea' : '已标记为误报',
learningApplied,
published: approved,
});
} catch (error) {

View File

@@ -94,7 +94,7 @@ llmConfigRouter.post('/providers', async (c) => {
const allProviders = providerRepo.list();
if (allProviders.length === 1) {
const modelRolesToBind: ModelRole[] = ['planner', 'specialist', 'judge', 'embedding'];
const modelRolesToBind: ModelRole[] = ['planner', 'specialist'];
for (const role of modelRolesToBind) {
modelRoleRepo.set(role, created.id, body.defaultModel);
}

View File

@@ -2,10 +2,9 @@ import * as crypto from 'node:crypto';
import { Context } from 'hono';
import { map } from 'lodash-es';
import config from '../config';
import { codexEngine } from '../review/codex/codex-engine';
import { LocalRepoManager } from '../review/context/local-repo-manager';
import { SandboxExec } from '../review/context/sandbox-exec';
import { reviewEngine } from '../review/engine';
import { getActiveReviewEngine, getReviewEngineLabel } from '../review/review-engine-provider';
import { PullRequestDetails, giteaService } from '../services/gitea';
import { getNotificationManager } from '../services/notification-manager';
import type { NotificationContext } from '../services/notification/types';
@@ -175,7 +174,7 @@ async function handlePullRequestEvent(c: Context, body: any): Promise<Response>
// 包含baseSha以支持retarget场景相同headSha但baseSha变化时需要重新审查
const idempotencyKey = `${owner}/${repoName}#${prNumber}:${baseSha}...${headSha}`;
const engineInstance = config.review.engine === 'codex' ? codexEngine : reviewEngine;
const engineInstance = getActiveReviewEngine();
const { run, reused } = await engineInstance.enqueuePullRequest({
eventType: 'pull_request',
idempotencyKey,
@@ -188,7 +187,7 @@ async function handlePullRequestEvent(c: Context, body: any): Promise<Response>
headSha,
});
const engineLabel = config.review.engine === 'codex' ? 'Codex' : 'Agent';
const engineLabel = getReviewEngineLabel();
return c.json(
{
status: reused ? 'deduplicated' : 'accepted',
@@ -322,7 +321,7 @@ async function handleCommitStatusEvent(c: Context, body: any): Promise<Response>
}
const idempotencyKey = `${owner}/${repoName}@${commitSha}`;
const engineInstance = config.review.engine === 'codex' ? codexEngine : reviewEngine;
const engineInstance = getActiveReviewEngine();
const { run, reused } = await engineInstance.enqueueCommit({
eventType: 'commit_status',
idempotencyKey,
@@ -334,7 +333,7 @@ async function handleCommitStatusEvent(c: Context, body: any): Promise<Response>
relatedPrNumber: relatedPR?.number,
});
const engineLabel = config.review.engine === 'codex' ? 'Codex' : 'Agent';
const engineLabel = getReviewEngineLabel();
return c.json(
{
status: reused ? 'deduplicated' : 'accepted',

View File

@@ -116,7 +116,7 @@ describe('migration 002 remove legacy review mode', () => {
const engineRow = db
.query('SELECT value FROM system_settings WHERE key = ?')
.get('REVIEW_ENGINE') as { value: string } | null;
expect(engineRow?.value).toBe('agent');
expect(engineRow?.value).toBe('kernel');
const roles = db
.query('SELECT role FROM model_role_assignments ORDER BY role ASC')

View File

@@ -11,12 +11,55 @@ function createInconsistentMigrationState(dbPath: string): void {
db.exec('PRAGMA foreign_keys = ON');
db.exec(`
CREATE TABLE IF NOT EXISTS _migrations (
version INTEGER PRIMARY KEY,
name TEXT NOT NULL,
version INTEGER PRIMARY KEY,
name TEXT NOT NULL,
applied_at TEXT NOT NULL DEFAULT (datetime('now'))
)
`);
db.exec(`
CREATE TABLE llm_providers (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
type TEXT NOT NULL,
base_url TEXT,
default_model TEXT NOT NULL,
is_enabled INTEGER NOT NULL DEFAULT 1,
extra_config TEXT DEFAULT '{}',
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
)
`);
db.exec(`
CREATE TABLE llm_secrets (
provider_id TEXT PRIMARY KEY REFERENCES llm_providers(id) ON DELETE CASCADE,
ciphertext BLOB NOT NULL,
iv BLOB NOT NULL,
auth_tag BLOB NOT NULL,
key_version INTEGER NOT NULL DEFAULT 1,
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
)
`);
db.exec(`
CREATE TABLE model_role_assignments (
role TEXT PRIMARY KEY CHECK (role IN ('planner','specialist','judge','embedding')),
provider_id TEXT NOT NULL REFERENCES llm_providers(id),
model TEXT NOT NULL,
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
)
`);
db.exec(`
CREATE TABLE system_settings (
key TEXT PRIMARY KEY,
value TEXT NOT NULL,
is_sensitive INTEGER NOT NULL DEFAULT 0,
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
)
`);
db.query('INSERT INTO _migrations (version, name) VALUES (?, ?)').run(
1,
'init_llm_provider_schema'

View File

@@ -78,7 +78,7 @@ describe('model-role-repo', () => {
});
test('can assign different roles', () => {
const roles: ModelRole[] = ['planner', 'specialist', 'judge', 'embedding'];
const roles: ModelRole[] = ['planner', 'specialist'];
for (const role of roles) {
modelRoleRepo.set(role, providerId, `model-for-${role}`);
}
@@ -125,7 +125,6 @@ describe('model-role-repo', () => {
test('results are ordered by role', () => {
modelRoleRepo.set('specialist', providerId, 'model-a');
modelRoleRepo.set('embedding', providerId, 'model-b');
modelRoleRepo.set('planner', providerId, 'model-c');
const all = modelRoleRepo.list();
@@ -158,13 +157,11 @@ describe('model-role-repo', () => {
test('returns all roles assigned to a provider', () => {
modelRoleRepo.set('specialist', providerId, 'gpt-4o-mini');
modelRoleRepo.set('planner', providerId, 'gpt-4o');
modelRoleRepo.set('judge', providerId, 'gpt-4o');
const roles = modelRoleRepo.getRolesByProvider(providerId);
expect(roles).toHaveLength(3);
expect(roles).toHaveLength(2);
expect(roles).toContain('specialist');
expect(roles).toContain('planner');
expect(roles).toContain('judge');
});
test('does not return roles assigned to other providers', () => {

View File

@@ -12,6 +12,9 @@ import { dirname, resolve } from 'node:path';
import { migration001Init } from './migrations/001_init';
import { migration002RemoveLegacyReviewMode } from './migrations/002_remove_legacy_review_mode';
import { migration003RepositoryReviewPrompts } from './migrations/003_repository_review_prompts';
import { migration004AgentKernelSessions } from './migrations/004_agent_kernel_sessions';
import { migration005AgentKernelSubagentInvocations } from './migrations/005_agent_kernel_subagent_invocations';
import { migration006RemoveJudgeEmbeddingRoles } from './migrations/006_remove_judge_embedding_roles';
// ---------------------------------------------------------------------------
// Types
@@ -31,6 +34,9 @@ const MIGRATIONS: Migration[] = [
migration001Init,
migration002RemoveLegacyReviewMode,
migration003RepositoryReviewPrompts,
migration004AgentKernelSessions,
migration005AgentKernelSubagentInvocations,
migration006RemoveJudgeEmbeddingRoles,
];
const REPOSITORY_REVIEW_PROMPTS_TABLE = 'repository_review_prompts';

View File

@@ -9,7 +9,7 @@ export const migration002RemoveLegacyReviewMode: Migration = {
up(db: Database): void {
db.exec(
"UPDATE system_settings SET value = 'agent' WHERE key = 'REVIEW_ENGINE' AND value NOT IN ('agent','codex')"
"UPDATE system_settings SET value = 'kernel' WHERE key = 'REVIEW_ENGINE' AND value NOT IN ('codex','kernel')"
);
db.exec(`

View File

@@ -0,0 +1,46 @@
import type { Database } from 'bun:sqlite';
import type { Migration } from '../database';
export const migration004AgentKernelSessions: Migration = {
version: 4,
name: 'agent_kernel_sessions',
up(db: Database): void {
db.exec(`
CREATE TABLE agent_kernel_sessions (
id TEXT PRIMARY KEY,
scope_type TEXT NOT NULL CHECK (scope_type IN ('pull_request', 'commit')),
scope_key TEXT NOT NULL UNIQUE,
metadata_json TEXT NOT NULL DEFAULT '{}',
last_run_id TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
)
`);
db.exec(`
CREATE TABLE agent_kernel_session_events (
id TEXT PRIMARY KEY,
session_id TEXT NOT NULL REFERENCES agent_kernel_sessions(id) ON DELETE CASCADE,
event_type TEXT NOT NULL,
payload_json TEXT NOT NULL DEFAULT '{}',
created_at TEXT NOT NULL DEFAULT (datetime('now'))
)
`);
db.exec(`
CREATE TABLE agent_kernel_session_checkpoints (
session_id TEXT PRIMARY KEY REFERENCES agent_kernel_sessions(id) ON DELETE CASCADE,
state_json TEXT NOT NULL,
pending_tasks_json TEXT NOT NULL,
stop_reason TEXT,
state_version INTEGER NOT NULL DEFAULT 1,
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
)
`);
db.exec(
'CREATE INDEX idx_agent_kernel_events_session ON agent_kernel_session_events(session_id, created_at)'
);
},
};

View File

@@ -0,0 +1,29 @@
import type { Database } from 'bun:sqlite';
import type { Migration } from '../database';
export const migration005AgentKernelSubagentInvocations: Migration = {
version: 5,
name: 'agent_kernel_subagent_invocations',
up(db: Database): void {
db.exec(`
CREATE TABLE agent_kernel_subagent_invocations (
id TEXT PRIMARY KEY,
parent_session_id TEXT NOT NULL REFERENCES agent_kernel_sessions(id) ON DELETE CASCADE,
parent_run_id TEXT NOT NULL,
parent_task_name TEXT NOT NULL,
subagent_name TEXT NOT NULL,
agent_id TEXT NOT NULL,
status TEXT NOT NULL CHECK (status IN ('running', 'completed', 'failed')),
input_json TEXT NOT NULL,
result_json TEXT,
started_at TEXT NOT NULL DEFAULT (datetime('now')),
finished_at TEXT
)
`);
db.exec(
'CREATE INDEX idx_agent_kernel_subagent_invocations_session ON agent_kernel_subagent_invocations(parent_session_id, started_at)'
);
},
};

View File

@@ -0,0 +1,30 @@
import type { Database } from 'bun:sqlite';
import type { Migration } from '../database';
const ALLOWED_ROLES = "'planner','specialist'";
export const migration006RemoveJudgeEmbeddingRoles: Migration = {
version: 6,
name: 'remove_judge_embedding_roles',
up(db: Database): void {
db.exec(`
CREATE TABLE model_role_assignments_new (
role TEXT PRIMARY KEY CHECK (role IN (${ALLOWED_ROLES})),
provider_id TEXT NOT NULL REFERENCES llm_providers(id),
model TEXT NOT NULL,
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
)
`);
db.exec(`
INSERT INTO model_role_assignments_new (role, provider_id, model, updated_at)
SELECT role, provider_id, model, updated_at
FROM model_role_assignments
WHERE role IN (${ALLOWED_ROLES})
`);
db.exec('DROP TABLE model_role_assignments');
db.exec('ALTER TABLE model_role_assignments_new RENAME TO model_role_assignments');
},
};

View File

@@ -1,6 +1,6 @@
/**
* Repository for model_role_assignments table.
* Maps business roles (planner, specialist, judge, embedding)
* Maps business roles (planner, specialist)
* to specific provider + model combinations.
*/
@@ -10,7 +10,7 @@ import { getDatabase } from '../database';
// Types
// ---------------------------------------------------------------------------
export type ModelRole = 'planner' | 'specialist' | 'judge' | 'embedding';
export type ModelRole = 'planner' | 'specialist';
export interface RoleAssignmentRow {
role: ModelRole;

View File

@@ -9,14 +9,21 @@ import { llmConfigRouter } from './controllers/llm-config';
import { handleGiteaWebhook } from './controllers/review';
import { initMasterKey } from './crypto/secrets';
import { initDatabase } from './db/database';
import { installE2EMockLLMGateway } from './llm/e2e-mock';
import { llmGateway } from './llm/gateway';
import { cleanupScheduler } from './review/cleanup-scheduler';
import { codexEngine } from './review/codex/codex-engine';
import { mcpRouter } from './review/codex/mcp-handler';
import { reviewEngine } from './review/engine';
import { getActiveReviewEngine } from './review/review-engine-provider';
initMasterKey();
initDatabase();
configManager.seedDefaults();
installE2EMockLLMGateway();
llmGateway.updateResilienceConfig(config.review.llmMaxConcurrentCalls, {
maxAttempts: config.review.llmRetryMaxAttempts,
baseDelayMs: config.review.llmRetryBaseDelayMs,
});
// 创建Hono应用实例
const app = new Hono();
@@ -77,26 +84,21 @@ app.get('*', serveStatic({ path: './public/index.html' }));
const port = config.app.port;
console.log(`⚡️ 服务启动在 http://localhost:${port}`);
// 启动审查引擎(根据配置选择)
reviewEngine.start().catch((error) => {
console.error('❌ 启动Agent Review Engine失败', error);
});
codexEngine.start().catch((error) => {
console.error('❌ 启动Codex Review Engine失败', error);
});
// 启动当前配置的审查引擎,避免非 active 引擎产生副作用。
getActiveReviewEngine()
.start()
.catch((error) => {
console.error('❌ 启动 Review Engine 失败', error);
});
// 启动清理调度器(定期清理过期 mirror/workspace 目录)
cleanupScheduler.start();
// 初始化反馈系统(总是初始化,记忆系统可选)
const reviewStore = reviewEngine.getStore();
// 初始化反馈系统
const reviewStore = getActiveReviewEngine().getStore();
initializeFeedbackSystem(reviewStore);
if (config.review.enableMemory) {
console.log('✅ 反馈系统已初始化(含向量记忆)');
} else {
console.log('✅ 反馈系统已初始化(不含向量记忆)');
}
console.log('✅ 反馈系统已初始化');
export default {
port,

View File

@@ -157,20 +157,6 @@ describe('LLMGateway', () => {
});
});
// ─── embedForRole: Error Cases ────────────────────────────────────
describe('embedForRole() — error handling', () => {
test('throws LLMNoProviderError when embedding role not assigned', async () => {
try {
await gateway.embedForRole(['text']);
expect(true).toBe(false);
} catch (e: any) {
expect(e.name).toBe('LLMNoProviderError');
expect(e.role).toBe('embedding');
}
});
});
// ─── Cache invalidation ──────────────────────────────────────────
describe('cache management', () => {

View File

@@ -0,0 +1,66 @@
import { describe, expect, test } from 'bun:test';
import { buildOpenAICompatibleChatParams } from '../providers/openai-compatible';
import type { LLMChatRequest, LLMToolDefinition } from '../types';
const readFileTool: LLMToolDefinition = {
name: 'read_file',
description: 'Read a file from the workspace',
parameters: {
type: 'object',
properties: {
file_path: { type: 'string' },
},
required: ['file_path'],
},
};
function makeRequest(providerOptions?: Record<string, unknown>): LLMChatRequest {
return {
model: 'ignored-model',
messages: [{ role: 'user', content: 'Review this change' }],
tools: [readFileTool],
providerOptions,
};
}
describe('OpenAI compatible provider params', () => {
test('passes scalar tool_choice provider option to Chat Completions', () => {
const params = buildOpenAICompatibleChatParams(
makeRequest({ tool_choice: 'required' }),
'gpt-4o'
);
expect(params.tool_choice).toBe('required');
expect(params.tools).toEqual([
{
type: 'function',
function: {
name: 'read_file',
description: 'Read a file from the workspace',
parameters: readFileTool.parameters,
},
},
]);
});
test('passes named function tool_choice provider option to Chat Completions', () => {
const params = buildOpenAICompatibleChatParams(
makeRequest({ tool_choice: { type: 'function', function: { name: 'read_file' } } }),
'gpt-4o'
);
expect(params.tool_choice).toEqual({
type: 'function',
function: { name: 'read_file' },
});
});
test('ignores invalid tool_choice provider option', () => {
const params = buildOpenAICompatibleChatParams(
makeRequest({ tool_choice: { type: 'function', function: {} } }),
'gpt-4o'
);
expect(params.tool_choice).toBeUndefined();
});
});

128
src/llm/e2e-mock.ts Normal file
View File

@@ -0,0 +1,128 @@
import { llmGateway } from './gateway';
import type { LLMChatRequest, LLMChatResponse, LLMToolCall, ModelRole } from './types';
type ChatForRoleFn = (
role: ModelRole,
request: Omit<LLMChatRequest, 'model'>
) => Promise<LLMChatResponse>;
interface MockResponseConfig {
content: string;
usage: { promptTokens: number; completionTokens: number; totalTokens: number };
}
const MOCK_RESPONSES: Record<ModelRole, MockResponseConfig> = {
specialist: {
content: JSON.stringify({
findings: [
{
severity: 'high',
confidence: 0.96,
path: 'src/user-handler.ts',
line: 16,
title: 'Caller dereferences nullable user profile fields',
detail:
'getUserDisplayName accepts UserRecord | null but dereferences user.profile!.displayName! without guarding null or missing profile data. The companion auth/user model shows callers can pass absent users.',
evidence:
'src/user-handler.ts: return user.profile!.displayName!.toUpperCase(); src/auth.ts: authenticate(...) returns User | null',
suggestion:
'Return a safe fallback when user/profile/displayName is missing, or reject null before calling getUserDisplayName.',
},
{
severity: 'medium',
confidence: 0.85,
path: 'src/user-handler.ts',
line: 6,
title: 'SQL injection via string interpolation',
detail:
'userId is interpolated directly into the SQL query string, allowing an attacker to inject arbitrary SQL.',
evidence: "const query = `SELECT * FROM users WHERE id = '${userId}'`;",
suggestion: 'Use parameterized queries instead of string interpolation.',
},
],
}),
usage: { promptTokens: 1200, completionTokens: 800, totalTokens: 2000 },
},
planner: {
content: JSON.stringify({
summary:
'The diff contains a new user-handler module with null safety and SQL injection issues.',
keyConcerns: ['Missing null check', 'SQL injection risk'],
recommendation: 'Require changes before merging.',
}),
usage: { promptTokens: 500, completionTokens: 200, totalTokens: 700 },
},
};
function toolCall(id: string, name: string, args: Record<string, unknown>): LLMToolCall {
return { id, name, arguments: JSON.stringify(args) };
}
function toolCallResponse(toolCalls: LLMToolCall[]): LLMChatResponse {
return {
content: null,
toolCalls,
finishReason: 'tool_calls',
usage: { promptTokens: 300, completionTokens: 60, totalTokens: 360 },
};
}
function stopResponse(config: MockResponseConfig): LLMChatResponse {
return {
content: config.content,
toolCalls: [],
finishReason: 'stop',
usage: config.usage,
};
}
function createAutonomousSpecialistResponse(
request: Omit<LLMChatRequest, 'model'>
): LLMChatResponse {
const toolResultCount = request.messages.filter((message) => message.role === 'tool').length;
if (toolResultCount === 0) {
return toolCallResponse([
toolCall('e2e_search_user_handler', 'search_code', {
pattern: 'getUserDisplayName|authenticate|findUserByEmail',
file_types: ['ts'],
max_results: 20,
}),
]);
}
if (toolResultCount === 1) {
return toolCallResponse([
toolCall('e2e_read_caller', 'read_file', { file_path: 'src/user-handler.ts' }),
]);
}
if (toolResultCount === 2) {
return toolCallResponse([
toolCall('e2e_read_callee', 'read_file', { file_path: 'src/auth.ts' }),
]);
}
return stopResponse(MOCK_RESPONSES.specialist);
}
export function createMockChatForRole(): ChatForRoleFn {
return async (role, request) => {
if (role === 'specialist' && request.tools?.length) {
return createAutonomousSpecialistResponse(request);
}
const config = MOCK_RESPONSES[role];
return stopResponse(config);
};
}
export function isE2EMockActive(): boolean {
return process.env.E2E_MOCK_LLM === '1';
}
export function installE2EMockLLMGateway(): void {
if (!isE2EMockActive()) return;
console.log('[E2E] LLM mock active — all chatForRole calls return preset responses');
llmGateway.chatForRole = createMockChatForRole() as typeof llmGateway.chatForRole;
}

View File

@@ -90,30 +90,6 @@ export class LLMGateway {
);
}
/**
* Embedding via the provider assigned to the 'embedding' role.
*/
async embedForRole(texts: string[]): Promise<number[][]> {
const assignment = modelRoleRepo.getByRole('embedding');
if (!assignment) throw new LLMNoProviderError('embedding');
return withResilience(
this.semaphore,
() => {
const provider = this.getOrCreateProvider(assignment.provider_id);
if (!provider.embed) {
throw new LLMError(
`Provider '${provider.type}' does not support embeddings`,
provider.type
);
}
return provider.embed(texts);
},
this.retryOptions,
'embedding'
);
}
/**
* Invalidate cached provider instance (call when config/key changes via UI).
*/

View File

@@ -76,6 +76,52 @@ function extractToolCalls(choice: OpenAI.ChatCompletion.Choice): LLMToolCall[] {
}));
}
type OpenAIToolChoice = NonNullable<OpenAI.ChatCompletionCreateParamsNonStreaming['tool_choice']>;
function isNamedToolChoice(value: unknown): value is {
type: 'function';
function: { name: string };
} {
if (!value || typeof value !== 'object') return false;
const candidate = value as Record<string, unknown>;
if (candidate.type !== 'function') return false;
const fn = candidate.function;
return Boolean(
fn && typeof fn === 'object' && typeof (fn as Record<string, unknown>).name === 'string'
);
}
function toOpenAIToolChoice(value: unknown): OpenAIToolChoice | undefined {
if (value === 'auto' || value === 'none' || value === 'required') {
return value;
}
if (isNamedToolChoice(value)) {
return {
type: 'function',
function: {
name: value.function.name,
},
};
}
return undefined;
}
export function buildOpenAICompatibleChatParams(
request: LLMChatRequest,
model: string
): OpenAI.ChatCompletionCreateParamsNonStreaming {
const toolChoice = toOpenAIToolChoice(request.providerOptions?.tool_choice);
return {
model,
messages: toOpenAIMessages(request.messages),
...(request.temperature !== undefined ? { temperature: request.temperature } : {}),
...(request.maxTokens !== undefined ? { max_tokens: request.maxTokens } : {}),
...(request.responseFormat === 'json' ? { response_format: { type: 'json_object' } } : {}),
...(request.tools?.length ? { tools: toOpenAITools(request.tools) } : {}),
...(toolChoice ? { tool_choice: toolChoice } : {}),
};
}
class OpenAICompatibleProvider implements LLMProvider {
readonly type = TYPE;
readonly capabilities: ProviderCapabilities;
@@ -101,15 +147,7 @@ class OpenAICompatibleProvider implements LLMProvider {
async chat(request: LLMChatRequest): Promise<LLMChatResponse> {
const model = request.model || this.defaultModel;
const params: OpenAI.ChatCompletionCreateParamsNonStreaming = {
model,
messages: toOpenAIMessages(request.messages),
...(request.temperature !== undefined ? { temperature: request.temperature } : {}),
...(request.maxTokens !== undefined ? { max_tokens: request.maxTokens } : {}),
...(request.responseFormat === 'json' ? { response_format: { type: 'json_object' } } : {}),
...(request.tools?.length ? { tools: toOpenAITools(request.tools) as any } : {}),
};
const params = buildOpenAICompatibleChatParams(request, model);
try {
const response = await this.client.chat.completions.create(params);

View File

@@ -5,7 +5,16 @@
import type { LLMToolDefinition } from './types';
export function toOpenAITools(tools: LLMToolDefinition[]): object[] {
export interface OpenAIToolDefinition {
type: 'function';
function: {
name: string;
description: string;
parameters: Record<string, unknown>;
};
}
export function toOpenAITools(tools: LLMToolDefinition[]): OpenAIToolDefinition[] {
return tools.map((tool) => ({
type: 'function' as const,
function: {

View File

@@ -10,15 +10,9 @@
// ---------------------------------------------------------------------------
/** Business role that maps to a specific provider + model via DB config. */
export type ModelRole = 'planner' | 'specialist' | 'judge' | 'embedding';
export type ModelRole = 'planner' | 'specialist';
/** All valid model roles. */
export const MODEL_ROLES: readonly ModelRole[] = [
'planner',
'specialist',
'judge',
'embedding',
] as const;
export const MODEL_ROLES: readonly ModelRole[] = ['planner', 'specialist'] as const;
// ---------------------------------------------------------------------------
// Provider Type

View File

@@ -0,0 +1,488 @@
import { describe, expect, mock, test } from 'bun:test';
import { z } from 'zod';
import type { LLMGateway } from '../../llm/gateway';
import type {
LLMChatRequest,
LLMChatResponse,
LLMMessage,
LLMToolDefinition,
ModelRole,
} from '../../llm/types';
import { AutonomousReviewAgent } from '../agents/autonomous-review-agent';
import { ToolRegistry } from '../tools/registry';
import type { Tool } from '../tools/types';
import type { ReviewContext, ReviewRun, ReviewTask } from '../types';
type ChatRequest = {
messages: LLMMessage[];
temperature?: number;
responseFormat?: 'text' | 'json';
tools?: LLMToolDefinition[];
providerOptions?: Record<string, unknown>;
};
type ChatCall = { role: ModelRole } & ChatRequest;
function makeRun(overrides: Partial<ReviewRun> = {}): ReviewRun {
return {
id: 'run-autonomous-001',
idempotencyKey: 'idem-autonomous',
eventType: 'pull_request',
status: 'in_progress',
owner: 'test-owner',
repo: 'test-repo',
cloneUrl: 'https://example.com/repo.git',
prNumber: 1,
baseSha: 'aaa',
headSha: 'bbb',
attempts: 0,
maxAttempts: 2,
createdAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
...overrides,
};
}
function makeContext(overrides: Partial<ReviewContext> = {}): ReviewContext {
return {
workspacePath: '/tmp/test-workspace',
mirrorPath: '/tmp/test-mirror',
diff: '--- a/src/foo.ts\n+++ b/src/foo.ts\n@@ -1,3 +1,4 @@\n+const x = null;\n export function foo() {}',
changedFiles: [{ path: 'src/foo.ts', status: 'M', additions: 1, deletions: 0 }],
parsedDiff: [
{
path: 'src/foo.ts',
changes: [{ lineNumber: 1, content: 'const x = null;', type: 'add' }],
},
],
fileContents: { 'src/foo.ts': 'const x = null;\nexport function foo() {}' },
...overrides,
};
}
function makeTask(overrides: Partial<ReviewTask> = {}): ReviewTask {
return {
mode: 'full',
reviewSize: 'medium',
riskTags: ['quality-sensitive'],
suspectedEntrypoints: ['src/foo.ts'],
tokenBudget: 8000,
...overrides,
};
}
function makeTool(name: string, execute: Tool['execute']): Tool {
return {
name,
description: `Tool ${name}`,
parameters: z.object({
query: z.string().optional(),
pattern: z.string().optional(),
file_path: z.string().optional(),
}),
isConcurrencySafe: true,
execute,
};
}
function createMockGateway(responses: Array<(call: ChatCall) => LLMChatResponse>) {
let callIndex = 0;
const calls: ChatCall[] = [];
return {
gateway: {
chatForRole: async (role: ModelRole, request: Omit<LLMChatRequest, 'model'>) => {
const call = { role, ...request };
calls.push(call);
const responseFn = responses[callIndex] ?? responses[responses.length - 1];
callIndex++;
return responseFn(call);
},
},
getCalls: () => calls,
};
}
function toolCallResponse(
toolCalls: Array<{ id: string; name: string; args: unknown }>
): LLMChatResponse {
return {
content: null,
toolCalls: toolCalls.map((toolCall) => ({
id: toolCall.id,
name: toolCall.name,
arguments: JSON.stringify(toolCall.args),
})),
finishReason: 'tool_calls',
usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
};
}
function contentResponse(content: string): LLMChatResponse {
return {
content,
toolCalls: [],
finishReason: 'stop',
usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
};
}
function jsonResponse(data: unknown): LLMChatResponse {
return contentResponse(JSON.stringify(data));
}
describe('AutonomousReviewAgent', () => {
test('model-driven investigation can search, read, then finalize without forced tool_choice or investigation JSON mode', async () => {
const registry = new ToolRegistry();
const searchCode = mock(async () => ({ results: ['src/foo.ts contains null'] }));
const readFile = mock(async () => ({ path: 'src/foo.ts', content: 'const x = null;' }));
registry.register(makeTool('search_code', searchCode));
registry.register(makeTool('read_file', readFile));
const finding = {
category: 'quality' as const,
severity: 'medium' as const,
confidence: 0.84,
path: 'src/foo.ts',
line: 1,
title: 'Null assignment needs guard',
detail: 'The new value is null and later code assumes a value.',
evidence: 'const x = null;',
suggestion: 'Use a safe default or guard downstream access.',
};
const { gateway, getCalls } = createMockGateway([
() => toolCallResponse([{ id: 'call_1', name: 'search_code', args: { query: 'null' } }]),
() =>
toolCallResponse([{ id: 'call_2', name: 'read_file', args: { file_path: 'src/foo.ts' } }]),
() => jsonResponse({ findings: [finding] }),
]);
const agent = new AutonomousReviewAgent(gateway as unknown as LLMGateway, registry);
const result = await agent.review(makeRun(), makeContext(), makeTask());
expect(searchCode).toHaveBeenCalledTimes(1);
expect(readFile).toHaveBeenCalledTimes(1);
expect(result.findings).toHaveLength(1);
expect(result.findings[0]).toMatchObject({ category: 'quality', path: 'src/foo.ts' });
const calls = getCalls();
expect(calls).toHaveLength(3);
expect(calls[0].role).toBe('specialist');
expect(calls[0].tools?.map((tool) => tool.name)).toEqual(['search_code', 'read_file']);
expect(calls[0].responseFormat).toBeUndefined();
expect(calls[0].providerOptions).toBeUndefined();
expect(calls[1].responseFormat).toBeUndefined();
expect(calls[1].providerOptions).toBeUndefined();
expect(result.diagnostics).toMatchObject({
iterations: 3,
toolCallNames: ['search_code', 'read_file'],
toolCallCount: 2,
parsedFindingCount: 1,
stopReason: 'modelFinalized',
});
expect(result.diagnostics?.stateSequence).toEqual([
'investigating',
'tool_calling',
'investigating',
'tool_calling',
'investigating',
'finalizing',
'completed',
]);
});
test('cross-file investigation reads caller and callee before reporting autonomous finding', async () => {
const registry = new ToolRegistry();
const searchCode = mock(async () => ({
matches: [
{
path: 'src/caller.ts',
line: 4,
content: 'return normalizeToken(raw).trim();',
},
{
path: 'src/callee.ts',
line: 2,
content: 'return raw.length === 0 ? null : raw;',
},
],
total: 2,
}));
const readFile = mock(async ({ file_path }: { file_path?: string }) => {
if (file_path === 'src/caller.ts') {
return {
path: 'src/caller.ts',
content:
"import { normalizeToken } from './callee';\n\nexport function buildHeader(raw: string) {\n return normalizeToken(raw).trim();\n}",
};
}
if (file_path === 'src/callee.ts') {
return {
path: 'src/callee.ts',
content:
'export function normalizeToken(raw: string): string | null {\n return raw.length === 0 ? null : raw;\n}',
};
}
return { path: file_path, error: 'unexpected file' };
});
registry.register(makeTool('search_code', searchCode));
registry.register(makeTool('read_file', readFile));
const finding = {
category: 'correctness' as const,
severity: 'high' as const,
confidence: 0.93,
path: 'src/caller.ts',
line: 4,
title: 'Caller trims nullable callee result',
detail:
'buildHeader calls trim() on normalizeToken(raw), but normalizeToken returns null for empty input in src/callee.ts.',
evidence:
'src/caller.ts: normalizeToken(raw).trim(); src/callee.ts: return raw.length === 0 ? null : raw;',
suggestion: 'Guard the nullable result or change normalizeToken to always return a string.',
};
const { gateway, getCalls } = createMockGateway([
() =>
toolCallResponse([
{ id: 'call_1', name: 'search_code', args: { pattern: 'normalizeToken' } },
]),
() =>
toolCallResponse([
{ id: 'call_2', name: 'read_file', args: { file_path: 'src/caller.ts' } },
]),
() =>
toolCallResponse([
{ id: 'call_3', name: 'read_file', args: { file_path: 'src/callee.ts' } },
]),
(call) => {
const toolMessages = call.messages.filter((message) => message.role === 'tool');
expect(toolMessages).toHaveLength(3);
expect(toolMessages.map((message) => message.content)).toEqual(
expect.arrayContaining([
expect.stringContaining('src/caller.ts'),
expect.stringContaining('src/callee.ts'),
])
);
return jsonResponse({ findings: [finding] });
},
]);
const result = await new AutonomousReviewAgent(
gateway as unknown as LLMGateway,
registry
).review(
makeRun(),
makeContext({
diff: [
'--- a/src/caller.ts',
'+++ b/src/caller.ts',
'@@ -1,3 +1,5 @@',
"+import { normalizeToken } from './callee';",
'+export function buildHeader(raw: string) {',
'+ return normalizeToken(raw).trim();',
'+}',
].join('\n'),
changedFiles: [{ path: 'src/caller.ts', status: 'M', additions: 4, deletions: 0 }],
parsedDiff: [
{
path: 'src/caller.ts',
changes: [
{ lineNumber: 4, content: ' return normalizeToken(raw).trim();', type: 'add' },
],
},
],
fileContents: {
'src/caller.ts':
"import { normalizeToken } from './callee';\n\nexport function buildHeader(raw: string) {\n return normalizeToken(raw).trim();\n}",
},
}),
makeTask({ suspectedEntrypoints: ['src/caller.ts'], maxTurns: 6, maxToolCalls: 6 })
);
expect(searchCode).toHaveBeenCalledTimes(1);
expect(readFile).toHaveBeenCalledTimes(2);
expect(readFile.mock.calls.map(([params]) => params.file_path)).toEqual([
'src/caller.ts',
'src/callee.ts',
]);
expect(getCalls()).toHaveLength(4);
expect(result.findings).toHaveLength(1);
expect(result.findings[0]).toMatchObject({
category: 'correctness',
severity: 'high',
path: 'src/caller.ts',
line: 4,
title: 'Caller trims nullable callee result',
});
expect(result.findings[0].detail).toContain('src/callee.ts');
expect(result.diagnostics).toMatchObject({
toolCallNames: ['search_code', 'read_file', 'read_file'],
toolCallCount: 3,
parsedFindingCount: 1,
stopReason: 'modelFinalized',
});
});
test('compact context keeps changed files and file contents outside suspectedEntrypoints', async () => {
const { gateway, getCalls } = createMockGateway([() => jsonResponse({ findings: [] })]);
const agent = new AutonomousReviewAgent(gateway as unknown as LLMGateway);
await agent.review(
makeRun(),
makeContext({
diff: [
'--- a/src/entry.ts',
'+++ b/src/entry.ts',
'@@ -1,2 +1,3 @@',
'+export const entry = true;',
'--- a/src/other.ts',
'+++ b/src/other.ts',
'@@ -1,2 +1,3 @@',
'+export const other = true;',
].join('\n'),
changedFiles: [
{ path: 'src/entry.ts', status: 'M', additions: 1, deletions: 0 },
{ path: 'src/other.ts', status: 'M', additions: 1, deletions: 0 },
],
parsedDiff: [
{
path: 'src/entry.ts',
changes: [{ lineNumber: 1, content: 'export const entry = true;', type: 'add' }],
},
{
path: 'src/other.ts',
changes: [{ lineNumber: 1, content: 'export const other = true;', type: 'add' }],
},
],
fileContents: {
'src/entry.ts': 'export const entry = true;',
'src/other.ts': 'export const other = true;',
},
}),
makeTask({ suspectedEntrypoints: ['src/entry.ts'] })
);
const userPrompt = getCalls()[0].messages.find((message) => message.role === 'user');
expect(userPrompt?.content).toContain('suspectedEntrypoints: src/entry.ts');
expect(userPrompt?.content).toContain('src/other.ts');
expect(userPrompt?.content).toContain('export const other = true;');
expect(userPrompt?.content).toContain('changedFiles');
});
test('uses default light budget and synthesizes after maxTurns when task omits specific limits', async () => {
const registry = new ToolRegistry();
registry.register(makeTool('search_code', async () => ({ results: [] })));
const { gateway, getCalls } = createMockGateway([
() => toolCallResponse([{ id: 'call_1', name: 'search_code', args: { query: 'a' } }]),
() => toolCallResponse([{ id: 'call_2', name: 'search_code', args: { query: 'b' } }]),
() => toolCallResponse([{ id: 'call_3', name: 'search_code', args: { query: 'c' } }]),
() => toolCallResponse([{ id: 'call_4', name: 'search_code', args: { query: 'd' } }]),
() => jsonResponse({ findings: [] }),
]);
const agent = new AutonomousReviewAgent(gateway as unknown as LLMGateway, registry);
const result = await agent.review(
makeRun(),
makeContext(),
makeTask({
mode: 'light',
maxTurns: undefined,
maxToolCalls: undefined,
maxElapsedMs: undefined,
})
);
expect(getCalls()).toHaveLength(5);
expect(getCalls()[4].responseFormat).toBe('json');
expect(result.findings).toHaveLength(0);
expect(result.diagnostics).toMatchObject({
iterations: 4,
toolCallCount: 4,
stopReason: 'maxTurns',
parsedFindingCount: 0,
});
expect(result.diagnostics?.stateSequence).toContain('synthesizing');
});
test('finalization repairs invalid JSON once and accepts valid JSON on second attempt', async () => {
const validFinding = {
severity: 'high' as const,
confidence: 0.91,
path: 'src/foo.ts',
line: 1,
title: 'Unsafe null',
detail: 'Null is returned to callers that expect a string.',
evidence: 'const x = null;',
suggestion: 'Return a string or update callers to handle null.',
};
const { gateway, getCalls } = createMockGateway([
() => contentResponse('not valid json'),
() => jsonResponse({ findings: [validFinding] }),
]);
const agent = new AutonomousReviewAgent(gateway as unknown as LLMGateway);
const result = await agent.review(makeRun(), makeContext(), makeTask({ mode: 'light' }));
expect(getCalls()).toHaveLength(2);
expect(getCalls()[0].responseFormat).toBeUndefined();
expect(getCalls()[1].responseFormat).toBe('json');
expect(result.findings).toHaveLength(1);
expect(result.findings[0].category).toBe('correctness');
expect(result.findings[0].fingerprint).toBeTruthy();
expect(result.diagnostics?.parseErrors?.length).toBe(1);
expect(result.diagnostics?.finalResponsePreview).toContain('Unsafe null');
});
test('stops after two consecutive empty investigation responses', async () => {
const { gateway } = createMockGateway([
() => ({
content: null,
toolCalls: [],
finishReason: 'stop',
usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
}),
() => ({
content: '',
toolCalls: [],
finishReason: 'stop',
usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
}),
() => jsonResponse({ findings: [] }),
]);
const agent = new AutonomousReviewAgent(gateway as unknown as LLMGateway);
const result = await agent.review(makeRun(), makeContext(), makeTask());
expect(result.findings).toHaveLength(0);
expect(result.diagnostics).toMatchObject({
emptyResponseCount: 2,
stopReason: 'emptyResponses',
parsedFindingCount: 0,
});
});
test('stops after three consecutive tool failures and records sequence', async () => {
const registry = new ToolRegistry();
registry.register(
makeTool('broken_tool', async () => {
throw new Error('boom');
})
);
const { gateway } = createMockGateway([
() => toolCallResponse([{ id: 'call_1', name: 'broken_tool', args: {} }]),
() => toolCallResponse([{ id: 'call_2', name: 'broken_tool', args: {} }]),
() => toolCallResponse([{ id: 'call_3', name: 'broken_tool', args: {} }]),
() => jsonResponse({ findings: [] }),
]);
const agent = new AutonomousReviewAgent(gateway as unknown as LLMGateway, registry);
const result = await agent.review(makeRun(), makeContext(), makeTask({ maxTurns: 10 }));
expect(result.findings).toHaveLength(0);
expect(result.diagnostics).toMatchObject({
toolCallNames: ['broken_tool', 'broken_tool', 'broken_tool'],
consecutiveToolFailures: 3,
stopReason: 'toolFailures',
});
});
});

View File

@@ -1,618 +0,0 @@
import { afterEach, beforeEach, describe, expect, test } from 'bun:test';
import { mock } from 'bun:test';
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import path from 'node:path';
import { JudgeAgent } from '../agents/judge-agent';
import type { TriageResult } from '../agents/triage-agent';
import type { DiffExtractor } from '../context/diff-extractor';
import type { LocalRepoManager } from '../context/local-repo-manager';
import { ReviewOrchestrator } from '../orchestrator';
import { applyPublishPolicy } from '../policy/publish-policy';
import { FileReviewStore } from '../store/file-review-store';
import type { Finding, PullRequestReviewPayload, ReviewContext, ReviewRun } from '../types';
type PartialFinding = Omit<Finding, 'id' | 'runId' | 'published'>;
function makePRPayload(
overrides: Partial<PullRequestReviewPayload> = {}
): PullRequestReviewPayload {
return {
idempotencyKey: 'test/repo#1:aaa...bbb',
eventType: 'pull_request',
owner: 'test-owner',
repo: 'test-repo',
cloneUrl: 'https://gitea.example.com/test-owner/test-repo.git',
prNumber: 1,
baseSha: 'aaa',
headSha: 'bbb',
...overrides,
};
}
function makeAgentFindings(
count: number,
severity: 'high' | 'medium' | 'low' = 'high'
): PartialFinding[] {
return Array.from({ length: count }, (_, i) => ({
fingerprint: `fp-${severity}-${i}`,
category: 'correctness' as const,
severity,
confidence: severity === 'high' ? 0.95 : severity === 'medium' ? 0.85 : 0.7,
path: `src/file${i}.ts`,
line: 10 + i,
title: `${severity} issue ${i}`,
detail: `Detail for ${severity} issue ${i}`,
evidence: `Evidence ${i}`,
suggestion: `Fix suggestion ${i}`,
}));
}
function makeReviewContext(overrides: Partial<ReviewContext> = {}): ReviewContext {
return {
workspacePath: '/tmp/workspace',
mirrorPath: '/tmp/mirror',
diff: 'diff --git a/src/core.ts b/src/core.ts\n+export const a = 1;',
changedFiles: [{ path: 'src/core.ts', status: 'M', additions: 1, deletions: 0 }],
parsedDiff: [
{
path: 'src/core.ts',
changes: [{ lineNumber: 1, oldLineNumber: 1, content: 'export const a = 1;', type: 'add' }],
},
],
fileContents: { 'src/core.ts': 'export const a = 1;' },
...overrides,
};
}
function createOrchestratorDeps(context: ReviewContext) {
const localRepoManager = {
prepareWorkspace: mock(async () => ({
mirrorPath: '/tmp/mirror',
workspacePath: '/tmp/workspace',
})),
resolveReviewedRef: mock(async () => null),
saveReviewedRef: mock(async () => undefined),
cleanupWorkspace: mock(async () => undefined),
};
const diffExtractor = {
getSandbox: mock(() => ({
execute: async () => ({ stdout: '', stderr: '', exitCode: 0 }),
})),
buildContext: mock(async () => context),
};
return {
localRepoManager,
diffExtractor,
};
}
/**
* Integration tests: Store → JudgeAgent → PublishPolicy → Store pipeline
*
* These tests simulate the orchestrator's data flow without needing
* live OpenAI or Gitea services. They verify that the pipeline from
* enqueueing a run through judging findings to applying publish policy
* works correctly end-to-end.
*/
describe('Integration: Store → Judge → Policy pipeline', () => {
let tempDir: string;
let store: FileReviewStore;
beforeEach(async () => {
tempDir = await mkdtemp(path.join(tmpdir(), 'integration-test-'));
store = new FileReviewStore(tempDir);
await store.init();
});
afterEach(async () => {
await rm(tempDir, { recursive: true, force: true });
});
test('full pipeline: enqueue → agent findings → judge dedup → policy → store findings → publish mark', async () => {
const payload = makePRPayload();
const { run, reused } = await store.createOrReuseRun(payload);
expect(reused).toBe(false);
expect(run.status).toBe('queued');
const acquired = await store.acquireNextQueuedRun();
expect(acquired).not.toBeNull();
expect(acquired!.status).toBe('in_progress');
await store.addStep({
runId: run.id,
stepName: 'run_specialists',
status: 'started',
startedAt: new Date().toISOString(),
});
const correctnessFindings = makeAgentFindings(2, 'high');
const securityFindings = makeAgentFindings(1, 'medium');
const lowFindings = makeAgentFindings(1, 'low');
const duplicateFinding: PartialFinding = {
...correctnessFindings[0],
confidence: 0.7,
detail: 'Duplicate with lower confidence',
};
const allAgentFindings = [
...correctnessFindings,
...securityFindings,
...lowFindings,
duplicateFinding,
];
const judge = new JudgeAgent();
const decision = judge.judge(allAgentFindings);
expect(decision.findings.length).toBe(4);
const dedupedFp0 = decision.findings.find((f) => f.fingerprint === 'fp-high-0');
expect(dedupedFp0!.confidence).toBe(0.95);
const policyResult = applyPublishPolicy(decision.findings, 0.8, false);
expect(policyResult.publishable.length).toBe(3);
expect(policyResult.gated.length).toBe(0);
expect(policyResult.dropped.length).toBe(1);
expect(policyResult.dropped[0].severity).toBe('low');
const findingsToStore = [...policyResult.publishable, ...policyResult.gated];
const persistedFindings: Finding[] = findingsToStore.map((f, i) => ({
...f,
id: `finding-${i}`,
runId: run.id,
published: false,
}));
await store.addFindings(run.id, persistedFindings);
for (const finding of policyResult.publishable) {
const wasNew = await store.markFindingPublished(run.id, finding.fingerprint);
expect(wasNew).toBe(true);
}
for (const finding of policyResult.publishable) {
const wasNew = await store.markFindingPublished(run.id, finding.fingerprint);
expect(wasNew).toBe(false);
}
await store.addCommentRecord({
runId: run.id,
status: 'published',
body: `## AI Agent代码审查结果\n\n${decision.summaryMarkdown}`,
});
for (const finding of policyResult.publishable) {
await store.addCommentRecord({
runId: run.id,
status: 'published',
path: finding.path,
line: finding.line,
body: `**[${finding.severity.toUpperCase()}]** ${finding.title}`,
});
}
await store.markRunSucceeded(run.id);
const details = await store.getRunDetails(run.id);
expect(details).not.toBeNull();
expect(details!.run.status).toBe('succeeded');
expect(details!.findings.length).toBe(3);
expect(details!.findings.every((f) => f.published)).toBe(true);
expect(details!.comments.length).toBe(4);
expect(details!.comments.filter((c) => !c.path).length).toBe(1);
expect(details!.comments.filter((c) => c.path).length).toBe(3);
});
test('pipeline with humanGate: low-confidence findings go to gated, not dropped', async () => {
const payload = makePRPayload({ idempotencyKey: 'gate-test' });
const { run } = await store.createOrReuseRun(payload);
await store.acquireNextQueuedRun();
const findings: PartialFinding[] = [
...makeAgentFindings(1, 'high'),
{
fingerprint: 'fp-low-conf',
category: 'security',
severity: 'high',
confidence: 0.5,
path: 'src/auth.ts',
line: 20,
title: 'Potential auth bypass',
detail: 'Detail',
evidence: 'Evidence',
suggestion: 'Fix',
},
];
const judge = new JudgeAgent();
const decision = judge.judge(findings);
const policyResult = applyPublishPolicy(decision.findings, 0.8, true);
expect(policyResult.publishable.length).toBe(1);
expect(policyResult.gated.length).toBe(1);
expect(policyResult.dropped.length).toBe(0);
expect(policyResult.gated[0].fingerprint).toBe('fp-low-conf');
const allToStore = [...policyResult.publishable, ...policyResult.gated];
const persisted: Finding[] = allToStore.map((f, i) => ({
...f,
id: `f-${i}`,
runId: run.id,
published: false,
}));
await store.addFindings(run.id, persisted);
for (const f of policyResult.publishable) {
await store.markFindingPublished(run.id, f.fingerprint);
}
for (const f of policyResult.gated) {
await store.addCommentRecord({
runId: run.id,
status: 'pending',
path: f.path,
line: f.line,
body: `PENDING: ${f.title}`,
fingerprint: f.fingerprint,
});
}
const details = await store.getRunDetails(run.id);
const pendingComments = details!.comments.filter((c) => c.status === 'pending');
expect(pendingComments.length).toBe(1);
expect(pendingComments[0].fingerprint).toBe('fp-low-conf');
const unpublished = details!.findings.filter((f) => !f.published);
expect(unpublished.length).toBe(1);
expect(unpublished[0].fingerprint).toBe('fp-low-conf');
});
test('idempotency: duplicate webhook enqueue returns same run', async () => {
const payload = makePRPayload();
const { run: first, reused: r1 } = await store.createOrReuseRun(payload);
expect(r1).toBe(false);
const { run: second, reused: r2 } = await store.createOrReuseRun(payload);
expect(r2).toBe(true);
expect(second.id).toBe(first.id);
const { run: third, reused: r3 } = await store.createOrReuseRun(payload);
expect(r3).toBe(true);
expect(third.id).toBe(first.id);
});
test('retry flow: failed run creates new run on next enqueue, old steps/findings preserved', async () => {
const payload = makePRPayload({ maxAttempts: 1 });
const { run: firstRun } = await store.createOrReuseRun(payload);
await store.acquireNextQueuedRun();
await store.addStep({
runId: firstRun.id,
stepName: 'prepare_workspace',
status: 'failed',
startedAt: new Date().toISOString(),
error: 'git clone failed',
});
await store.markRunFailed(firstRun.id, 'git clone failed');
const firstDetails = await store.getRunDetails(firstRun.id);
expect(firstDetails!.run.status).toBe('failed');
expect(firstDetails!.steps.length).toBe(1);
const { run: retryRun, reused } = await store.createOrReuseRun(payload);
expect(reused).toBe(false);
expect(retryRun.id).not.toBe(firstRun.id);
const retryAcquired = await store.acquireNextQueuedRun();
expect(retryAcquired!.id).toBe(retryRun.id);
});
test('recovery after crash: in_progress runs are recovered to queued', async () => {
const p1 = makePRPayload({ idempotencyKey: 'crash-1' });
const p2 = makePRPayload({ idempotencyKey: 'crash-2' });
const { run: run1 } = await store.createOrReuseRun(p1);
const { run: run2 } = await store.createOrReuseRun(p2);
await store.acquireNextQueuedRun();
await store.acquireNextQueuedRun();
await store.markRunSucceeded(run1.id);
const store2 = new FileReviewStore(tempDir);
await store2.init();
const recovered = await store2.recoverInterruptedRuns();
expect(recovered).toBe(1);
const next = await store2.acquireNextQueuedRun();
expect(next).not.toBeNull();
expect(next!.id).toBe(run2.id);
});
test('concurrent enqueue: multiple payloads with different keys all get unique runs', async () => {
const payloads = Array.from({ length: 5 }, (_, i) =>
makePRPayload({ idempotencyKey: `concurrent-${i}`, prNumber: i + 1 })
);
const results = await Promise.all(payloads.map((p) => store.createOrReuseRun(p)));
const ids = new Set(results.map((r) => r.run.id));
expect(ids.size).toBe(5);
expect(results.every((r) => !r.reused)).toBe(true);
const runs = await store.listRuns(10);
expect(runs.length).toBe(5);
});
test('end-to-end: no findings → summary only, no line comments', async () => {
const payload = makePRPayload({ idempotencyKey: 'no-findings' });
const { run } = await store.createOrReuseRun(payload);
await store.acquireNextQueuedRun();
const judge = new JudgeAgent();
const decision = judge.judge([]);
expect(decision.findings.length).toBe(0);
expect(decision.summaryMarkdown).toContain('未发现');
const policyResult = applyPublishPolicy(decision.findings, 0.8, false);
expect(policyResult.publishable.length).toBe(0);
expect(policyResult.gated.length).toBe(0);
expect(policyResult.dropped.length).toBe(0);
await store.addCommentRecord({
runId: run.id,
status: 'published',
body: decision.summaryMarkdown,
});
await store.markRunSucceeded(run.id);
const details = await store.getRunDetails(run.id);
expect(details!.run.status).toBe('succeeded');
expect(details!.findings.length).toBe(0);
expect(details!.comments.length).toBe(1);
expect(details!.comments[0].body).toContain('未发现');
});
test('store persistence: data survives across store instances', async () => {
const payload = makePRPayload();
const { run } = await store.createOrReuseRun(payload);
await store.acquireNextQueuedRun();
const findings: Finding[] = [
{
id: 'persist-f1',
runId: run.id,
fingerprint: 'persist-fp-1',
category: 'security',
severity: 'high',
confidence: 0.95,
path: 'src/auth.ts',
line: 42,
title: 'SQL injection',
detail: 'Detail',
evidence: 'Evidence',
suggestion: 'Use parameterized queries',
published: false,
},
];
await store.addFindings(run.id, findings);
await store.markFindingPublished(run.id, 'persist-fp-1');
await store.markRunSucceeded(run.id);
const freshStore = new FileReviewStore(tempDir);
await freshStore.init();
const details = await freshStore.getRunDetails(run.id);
expect(details).not.toBeNull();
expect(details!.run.status).toBe('succeeded');
expect(details!.findings.length).toBe(1);
expect(details!.findings[0].published).toBe(true);
expect(details!.findings[0].fingerprint).toBe('persist-fp-1');
});
});
describe('Integration: orchestrator staged routing pipeline', () => {
let tempDir: string;
let store: FileReviewStore;
beforeEach(async () => {
mock.restore();
tempDir = await mkdtemp(path.join(tmpdir(), 'orchestrator-integration-'));
store = new FileReviewStore(tempDir);
await store.init();
});
afterEach(async () => {
mock.restore();
await rm(tempDir, { recursive: true, force: true });
});
test('skip mode bypasses specialists end-to-end', async () => {
const payload = makePRPayload({ idempotencyKey: 'stage-skip' });
const { run } = await store.createOrReuseRun(payload);
const acquired = await store.acquireNextQueuedRun();
expect(acquired).not.toBeNull();
const context = makeReviewContext({
changedFiles: [{ path: 'README.md', status: 'M', additions: 2, deletions: 0 }],
parsedDiff: [
{
path: 'README.md',
changes: [{ lineNumber: 10, oldLineNumber: 10, content: 'new docs', type: 'add' }],
},
],
fileContents: { 'README.md': 'new docs' },
diff: 'diff --git a/README.md b/README.md\n+new docs',
});
const { localRepoManager, diffExtractor } = createOrchestratorDeps(context);
const orchestrator = new ReviewOrchestrator(
store,
localRepoManager as unknown as LocalRepoManager,
diffExtractor as unknown as DiffExtractor
);
const internal = orchestrator as unknown as {
triageAgent: { analyze: (ctx: ReviewContext) => Promise<TriageResult> };
correctnessAgent: {
reviewWithOptions: (
runArg: ReviewRun,
ctx: ReviewContext,
options?: unknown
) => Promise<unknown>;
};
publishSummary: (runArg: ReviewRun, summary: string, gatedCount: number) => Promise<void>;
publishLineComments: (
runArg: ReviewRun,
comments: Array<{ path: string; line: number; comment: string }>
) => Promise<boolean>;
};
internal.triageAgent = {
analyze: mock(
async (): Promise<TriageResult> => ({
complexity: 'trivial',
reviewSize: 'small',
mode: 'skip',
tasks: [],
riskTags: [],
rationale: 'docs-only',
})
),
};
const correctnessSpy = mock(async () => ({ agentName: 'Correctness Agent', findings: [] }));
internal.correctnessAgent.reviewWithOptions = correctnessSpy;
internal.publishSummary = mock(async () => undefined);
internal.publishLineComments = mock(async () => false);
await orchestrator.execute(acquired!);
expect(correctnessSpy).not.toHaveBeenCalled();
const details = await store.getRunDetails(run.id);
expect(details).not.toBeNull();
expect(details!.findings).toHaveLength(0);
});
test('full task mode passes scoped options and publishes finding', async () => {
const payload = makePRPayload({ idempotencyKey: 'stage-full' });
const { run } = await store.createOrReuseRun(payload);
const acquired = await store.acquireNextQueuedRun();
expect(acquired).not.toBeNull();
const context = makeReviewContext();
const { localRepoManager, diffExtractor } = createOrchestratorDeps(context);
const orchestrator = new ReviewOrchestrator(
store,
localRepoManager as unknown as LocalRepoManager,
diffExtractor as unknown as DiffExtractor
);
const internal = orchestrator as unknown as {
triageAgent: { analyze: (ctx: ReviewContext) => Promise<TriageResult> };
correctnessAgent: {
reviewWithOptions: (
runArg: ReviewRun,
ctx: ReviewContext,
options?: {
scopePaths?: string[];
allowTools?: boolean;
maxIterations?: number;
mode?: 'skip' | 'light' | 'full';
maxContextTokens?: number;
}
) => Promise<{
agentName: string;
findings: Array<Omit<Finding, 'id' | 'runId' | 'published'>>;
}>;
};
publishSummary: (runArg: ReviewRun, summary: string, gatedCount: number) => Promise<void>;
publishLineComments: (
runArg: ReviewRun,
comments: Array<{ path: string; line: number; comment: string }>
) => Promise<boolean>;
};
internal.triageAgent = {
analyze: mock(
async (): Promise<TriageResult> => ({
complexity: 'standard',
reviewSize: 'small',
mode: 'full',
riskTags: ['security-sensitive'],
rationale: 'auth file changed',
tasks: [
{
domain: 'correctness',
paths: ['src/core.ts'],
riskTags: ['security-sensitive'],
mode: 'full',
tokenBudget: 12000,
maxIterations: 2,
allowTools: false,
allowReflection: false,
allowDebate: false,
},
],
})
),
};
const correctnessSpy = mock(
async (
_runArg: ReviewRun,
_ctx: ReviewContext,
_options?: {
scopePaths?: string[];
allowTools?: boolean;
maxIterations?: number;
mode?: 'skip' | 'light' | 'full';
maxContextTokens?: number;
}
) => ({
agentName: 'Correctness Agent',
findings: [
{
fingerprint: 'stage-full-fp-1',
category: 'correctness' as const,
severity: 'high' as const,
confidence: 0.95,
path: 'src/core.ts',
line: 1,
title: 'critical issue',
detail: 'detail',
evidence: 'evidence',
suggestion: 'fix',
},
],
})
);
internal.correctnessAgent.reviewWithOptions = correctnessSpy;
internal.publishSummary = mock(async () => undefined);
internal.publishLineComments = mock(async () => true);
await orchestrator.execute(acquired!);
expect(correctnessSpy).toHaveBeenCalledTimes(1);
const callArgs = correctnessSpy.mock.calls[0];
const options = callArgs?.[2];
expect(options?.scopePaths).toEqual(['src/core.ts']);
expect(options?.allowTools).toBe(false);
expect(options?.maxIterations).toBe(2);
expect(options?.mode).toBe('full');
const details = await store.getRunDetails(run.id);
expect(details).not.toBeNull();
expect(details!.findings).toHaveLength(1);
expect(details!.findings[0].published).toBe(true);
expect(details!.findings[0].path).toBe('src/core.ts');
});
});

View File

@@ -1,137 +0,0 @@
import { describe, expect, test } from 'bun:test';
import { JudgeAgent } from '../agents/judge-agent';
import type { Finding } from '../types';
type TestFinding = Omit<Finding, 'id' | 'runId' | 'published'>;
function makeFinding(overrides: Partial<TestFinding> = {}): TestFinding {
return {
fingerprint: `fp-${Math.random().toString(36).slice(2, 8)}`,
category: 'correctness',
severity: 'medium',
confidence: 0.8,
path: 'src/foo.ts',
line: 10,
title: 'Test issue',
detail: 'Detail',
evidence: 'Evidence',
suggestion: 'Fix it',
...overrides,
};
}
describe('JudgeAgent', () => {
const judge = new JudgeAgent();
// ─── Empty input ───
test('empty findings → summary says 未发现', () => {
const result = judge.judge([]);
expect(result.findings).toHaveLength(0);
expect(result.summaryMarkdown).toContain('未发现');
});
// ─── Deduplication by fingerprint ───
test('duplicate fingerprints → keeps highest weighted', () => {
const fp = 'same-fingerprint';
const findings: TestFinding[] = [
makeFinding({ fingerprint: fp, severity: 'low', confidence: 0.9 }), // weight: 1 * 0.9 = 0.9
makeFinding({ fingerprint: fp, severity: 'high', confidence: 0.5 }), // weight: 3 * 0.5 = 1.5 ← winner
makeFinding({ fingerprint: fp, severity: 'medium', confidence: 0.6 }), // weight: 2 * 0.6 = 1.2
];
const result = judge.judge(findings);
expect(result.findings).toHaveLength(1);
expect(result.findings[0].severity).toBe('high');
expect(result.findings[0].confidence).toBe(0.5);
});
test('same fingerprint same weight → first one wins (no override)', () => {
const fp = 'dup-fp';
const findings: TestFinding[] = [
makeFinding({ fingerprint: fp, severity: 'high', confidence: 0.5, title: 'First' }),
makeFinding({ fingerprint: fp, severity: 'high', confidence: 0.5, title: 'Second' }),
];
const result = judge.judge(findings);
expect(result.findings).toHaveLength(1);
// Same weight → second does NOT override (currentWeight > existingWeight is strict >)
expect(result.findings[0].title).toBe('First');
});
// ─── Sorting by severity × confidence ───
test('findings sorted by weight descending', () => {
const findings: TestFinding[] = [
makeFinding({ fingerprint: 'a', severity: 'low', confidence: 0.9 }), // 1 * 0.9 = 0.9
makeFinding({ fingerprint: 'b', severity: 'high', confidence: 0.8 }), // 3 * 0.8 = 2.4
makeFinding({ fingerprint: 'c', severity: 'medium', confidence: 0.7 }), // 2 * 0.7 = 1.4
];
const result = judge.judge(findings);
expect(result.findings).toHaveLength(3);
expect(result.findings[0].fingerprint).toBe('b'); // weight 2.4
expect(result.findings[1].fingerprint).toBe('c'); // weight 1.4
expect(result.findings[2].fingerprint).toBe('a'); // weight 0.9
});
// ─── Summary text ───
test('summary counts by severity', () => {
const findings: TestFinding[] = [
makeFinding({ fingerprint: 'a', severity: 'high', confidence: 0.9 }),
makeFinding({ fingerprint: 'b', severity: 'high', confidence: 0.85 }),
makeFinding({ fingerprint: 'c', severity: 'medium', confidence: 0.8 }),
makeFinding({ fingerprint: 'd', severity: 'low', confidence: 0.7 }),
];
const result = judge.judge(findings);
expect(result.summaryMarkdown).toContain('4 个问题');
expect(result.summaryMarkdown).toContain('high 2');
expect(result.summaryMarkdown).toContain('medium 1');
expect(result.summaryMarkdown).toContain('low 1');
});
test('single finding → counts correctly', () => {
const findings: TestFinding[] = [
makeFinding({ fingerprint: 'x', severity: 'medium', confidence: 0.8 }),
];
const result = judge.judge(findings);
expect(result.summaryMarkdown).toContain('1 个问题');
expect(result.summaryMarkdown).toContain('high 0');
expect(result.summaryMarkdown).toContain('medium 1');
expect(result.summaryMarkdown).toContain('low 0');
});
// ─── Dedup + sort combined ───
test('dedup then sort: complex scenario', () => {
const findings: TestFinding[] = [
makeFinding({ fingerprint: 'x', severity: 'low', confidence: 0.3 }), // weight 0.3 — will be overridden
makeFinding({ fingerprint: 'y', severity: 'high', confidence: 0.9 }), // weight 2.7 — unique
makeFinding({ fingerprint: 'x', severity: 'medium', confidence: 0.8 }), // weight 1.6 — overrides x
makeFinding({ fingerprint: 'z', severity: 'high', confidence: 0.5 }), // weight 1.5 — unique
];
const result = judge.judge(findings);
expect(result.findings).toHaveLength(3); // x, y, z (deduped)
// Sorted by weight: y(2.7) > x(1.6) > z(1.5)
expect(result.findings[0].fingerprint).toBe('y');
expect(result.findings[1].fingerprint).toBe('x');
expect(result.findings[1].severity).toBe('medium'); // overridden version
expect(result.findings[2].fingerprint).toBe('z');
});
// ─── All same severity ───
test('all high severity → sorted by confidence descending', () => {
const findings: TestFinding[] = [
makeFinding({ fingerprint: 'a', severity: 'high', confidence: 0.5 }),
makeFinding({ fingerprint: 'b', severity: 'high', confidence: 0.9 }),
makeFinding({ fingerprint: 'c', severity: 'high', confidence: 0.7 }),
];
const result = judge.judge(findings);
expect(result.findings[0].fingerprint).toBe('b');
expect(result.findings[1].fingerprint).toBe('c');
expect(result.findings[2].fingerprint).toBe('a');
});
// ─── Return type structure ───
test('result has summaryMarkdown and findings', () => {
const result = judge.judge([]);
expect(result).toHaveProperty('summaryMarkdown');
expect(result).toHaveProperty('findings');
expect(typeof result.summaryMarkdown).toBe('string');
expect(Array.isArray(result.findings)).toBe(true);
});
});

View File

@@ -1,435 +0,0 @@
import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test';
import { CodexRunner } from '../codex/codex-runner';
import type { DiffExtractor } from '../context/diff-extractor';
import type { LocalRepoManager, LocalRepoPaths } from '../context/local-repo-manager';
import { ReviewOrchestrator } from '../orchestrator';
import type { FileReviewStore } from '../store/file-review-store';
import type { Finding, ReviewContext, ReviewRun, ReviewTask } from '../types';
type Snapshot = { baseSha: string; headSha: string } | null;
function makeRun(overrides: Partial<ReviewRun> = {}): ReviewRun {
return {
id: 'run-1',
idempotencyKey: 'owner/repo#1:base...head',
eventType: 'pull_request',
status: 'in_progress',
owner: 'owner',
repo: 'repo',
cloneUrl: 'https://example.com/repo.git',
prNumber: 1,
baseSha: 'base-sha',
headSha: 'head-sha',
attempts: 1,
maxAttempts: 3,
createdAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
...overrides,
};
}
function createStoreMock() {
const store = {
markRunIgnored: mock(async () => undefined),
addStep: mock(async () => undefined),
getRunDetails: mock(async () => ({ comments: [], findings: [] })),
addFindings: mock(async () => undefined),
markFindingPublished: mock(async () => true),
addCommentRecord: mock(async () => undefined),
};
return store;
}
function createLocalRepoManagerMock(snapshot: Snapshot) {
const repoPaths: LocalRepoPaths = {
mirrorPath: '/tmp/mirror',
workspacePath: '/tmp/workspace',
};
const manager = {
prepareWorkspace: mock(async () => repoPaths),
resolveReviewedRef: mock(async () => snapshot),
saveReviewedRef: mock(async () => undefined),
cleanupWorkspace: mock(async () => undefined),
};
return { manager, repoPaths };
}
function createDiffExtractorMock(diff = 'diff --git a/a.ts b/a.ts\n+const x = 1;') {
const context: ReviewContext = {
workspacePath: '/tmp/workspace',
mirrorPath: '/tmp/mirror',
diff,
changedFiles: [],
parsedDiff: [],
fileContents: {},
};
const extractor = {
getSandbox: mock(() => ({
execute: async () => ({ stdout: '', stderr: '', exitCode: 0 }),
})),
buildContext: mock(async () => context),
};
return { extractor, context };
}
function wireOrchestratorFastPath(orchestrator: ReviewOrchestrator) {
const internal = orchestrator as unknown as {
triageAgent: {
analyze: (context: ReviewContext) => Promise<{
complexity: 'trivial' | 'standard' | 'complex';
reviewSize: 'small' | 'medium' | 'large';
mode: 'skip' | 'light' | 'full';
tasks: ReviewTask[];
riskTags: string[];
rationale: string;
}>;
};
judgeAgent: {
judge: (findings: Array<Omit<Finding, 'id' | 'runId' | 'published'>>) => {
summaryMarkdown: string;
findings: Array<Omit<Finding, 'id' | 'runId' | 'published'>>;
};
};
publishSummary: (run: ReviewRun, summary: string, gatedCount: number) => Promise<void>;
publishLineComments: (
run: ReviewRun,
comments: Array<{ path: string; line: number; comment: string }>
) => Promise<boolean>;
};
internal.triageAgent = {
analyze: mock(async () => ({
complexity: 'trivial' as const,
reviewSize: 'small' as const,
mode: 'skip' as const,
tasks: [],
riskTags: [],
rationale: 'test fast-path',
})),
};
internal.judgeAgent = {
judge: mock(() => ({ summaryMarkdown: 'ok', findings: [] })),
};
internal.publishSummary = mock(async () => undefined);
internal.publishLineComments = mock(async () => false);
}
function createCodexRunnerForExecute(snapshot: Snapshot) {
const store = createStoreMock();
const { manager, repoPaths } = createLocalRepoManagerMock(snapshot);
const runner = new CodexRunner(
store as unknown as FileReviewStore,
manager as unknown as LocalRepoManager
);
const internal = runner as unknown as {
generateCodexWorkspaceConfig: (workspacePath: string, runId: string) => Promise<void>;
runCodexProcess: (
workspacePath: string,
run: ReviewRun,
lastReviewedHead?: string
) => Promise<void>;
};
internal.generateCodexWorkspaceConfig = mock(async () => undefined);
internal.runCodexProcess = mock(async () => undefined);
return {
runner,
store,
manager,
repoPaths,
runCodexProcessMock: internal.runCodexProcess as ReturnType<typeof mock>,
};
}
describe('ReviewOrchestrator incremental baseline resolution', () => {
beforeEach(() => {
mock.restore();
});
afterEach(() => {
mock.restore();
});
test('matching baseSha uses snapshot head as lastReviewedHead', async () => {
const run = makeRun({ baseSha: 'same-base', headSha: 'new-head' });
const store = createStoreMock();
const { manager } = createLocalRepoManagerMock({ baseSha: 'same-base', headSha: 'old-head' });
const { extractor } = createDiffExtractorMock();
const orchestrator = new ReviewOrchestrator(
store as unknown as FileReviewStore,
manager as unknown as LocalRepoManager,
extractor as unknown as DiffExtractor
);
wireOrchestratorFastPath(orchestrator);
await orchestrator.execute(run);
expect(manager.resolveReviewedRef).toHaveBeenCalledTimes(1);
expect(extractor.buildContext).toHaveBeenCalledTimes(1);
expect(extractor.buildContext).toHaveBeenCalledWith(
run,
'/tmp/mirror',
'/tmp/workspace',
'old-head'
);
});
test('different baseSha falls back to full review (no lastReviewedHead)', async () => {
const run = makeRun({ baseSha: 'current-base' });
const store = createStoreMock();
const { manager } = createLocalRepoManagerMock({ baseSha: 'saved-base', headSha: 'old-head' });
const { extractor } = createDiffExtractorMock();
const orchestrator = new ReviewOrchestrator(
store as unknown as FileReviewStore,
manager as unknown as LocalRepoManager,
extractor as unknown as DiffExtractor
);
wireOrchestratorFastPath(orchestrator);
await orchestrator.execute(run);
expect(extractor.buildContext).toHaveBeenCalledWith(
run,
'/tmp/mirror',
'/tmp/workspace',
undefined
);
});
test('missing snapshot falls back to full review', async () => {
const run = makeRun();
const store = createStoreMock();
const { manager } = createLocalRepoManagerMock(null);
const { extractor } = createDiffExtractorMock();
const orchestrator = new ReviewOrchestrator(
store as unknown as FileReviewStore,
manager as unknown as LocalRepoManager,
extractor as unknown as DiffExtractor
);
wireOrchestratorFastPath(orchestrator);
await orchestrator.execute(run);
expect(extractor.buildContext).toHaveBeenCalledWith(
run,
'/tmp/mirror',
'/tmp/workspace',
undefined
);
});
test('non pull_request event skips incremental snapshot lookup', async () => {
const run = makeRun({
eventType: 'commit_status',
prNumber: undefined,
commitSha: 'commit-sha',
headSha: undefined,
});
const store = createStoreMock();
const { manager } = createLocalRepoManagerMock({ baseSha: 'same-base', headSha: 'old-head' });
const { extractor } = createDiffExtractorMock();
const orchestrator = new ReviewOrchestrator(
store as unknown as FileReviewStore,
manager as unknown as LocalRepoManager,
extractor as unknown as DiffExtractor
);
wireOrchestratorFastPath(orchestrator);
await orchestrator.execute(run);
expect(manager.resolveReviewedRef).not.toHaveBeenCalled();
expect(extractor.buildContext).toHaveBeenCalledWith(
run,
'/tmp/mirror',
'/tmp/workspace',
undefined
);
});
test('successful review saves reviewed ref snapshot', async () => {
const run = makeRun({ baseSha: 'base-1', headSha: 'head-1', prNumber: 99 });
const store = createStoreMock();
const { manager } = createLocalRepoManagerMock(null);
const { extractor } = createDiffExtractorMock();
const orchestrator = new ReviewOrchestrator(
store as unknown as FileReviewStore,
manager as unknown as LocalRepoManager,
extractor as unknown as DiffExtractor
);
wireOrchestratorFastPath(orchestrator);
await orchestrator.execute(run);
expect(manager.saveReviewedRef).toHaveBeenCalledTimes(1);
expect(manager.saveReviewedRef).toHaveBeenCalledWith('/tmp/mirror', 99, 'base-1', 'head-1');
});
test('failed review does not save reviewed ref snapshot', async () => {
const run = makeRun({ baseSha: 'base-1', headSha: 'head-1', prNumber: 99 });
const store = createStoreMock();
const { manager } = createLocalRepoManagerMock(null);
const { extractor } = createDiffExtractorMock();
extractor.buildContext = mock(async () => {
throw new Error('context failed');
});
const orchestrator = new ReviewOrchestrator(
store as unknown as FileReviewStore,
manager as unknown as LocalRepoManager,
extractor as unknown as DiffExtractor
);
let caught: Error | undefined;
try {
await orchestrator.execute(run);
} catch (error) {
caught = error as Error;
}
expect(caught).toBeDefined();
expect(caught?.message).toContain('context failed');
expect(manager.saveReviewedRef).not.toHaveBeenCalled();
});
});
describe('CodexRunner incremental baseline resolution and prompt behavior', () => {
beforeEach(() => {
mock.restore();
});
afterEach(() => {
mock.restore();
});
test('matching baseSha uses snapshot head for incremental run', async () => {
const run = makeRun({ baseSha: 'same-base', headSha: 'new-head' });
const { runner, runCodexProcessMock } = createCodexRunnerForExecute({
baseSha: 'same-base',
headSha: 'old-head',
});
await runner.execute(run);
expect(runCodexProcessMock).toHaveBeenCalledWith('/tmp/workspace', run, 'old-head');
});
test('different baseSha falls back to full run', async () => {
const run = makeRun({ baseSha: 'current-base', headSha: 'new-head' });
const { runner, runCodexProcessMock } = createCodexRunnerForExecute({
baseSha: 'saved-base',
headSha: 'old-head',
});
await runner.execute(run);
expect(runCodexProcessMock).toHaveBeenCalledWith('/tmp/workspace', run, undefined);
});
test('missing snapshot falls back to full run', async () => {
const run = makeRun();
const { runner, runCodexProcessMock } = createCodexRunnerForExecute(null);
await runner.execute(run);
expect(runCodexProcessMock).toHaveBeenCalledWith('/tmp/workspace', run, undefined);
});
test('non pull_request event skips incremental snapshot lookup', async () => {
const run = makeRun({
eventType: 'commit_status',
prNumber: undefined,
commitSha: 'commit-sha',
headSha: undefined,
});
const { runner, manager, runCodexProcessMock } = createCodexRunnerForExecute({
baseSha: 'saved-base',
headSha: 'old-head',
});
await runner.execute(run);
expect(manager.resolveReviewedRef).not.toHaveBeenCalled();
expect(runCodexProcessMock).toHaveBeenCalledWith('/tmp/workspace', run, undefined);
});
test('successful codex review saves reviewed ref snapshot', async () => {
const run = makeRun({ baseSha: 'base-1', headSha: 'head-1', prNumber: 22 });
const { runner, manager } = createCodexRunnerForExecute(null);
await runner.execute(run);
expect(manager.saveReviewedRef).toHaveBeenCalledTimes(1);
expect(manager.saveReviewedRef).toHaveBeenCalledWith('/tmp/mirror', 22, 'base-1', 'head-1');
});
test('failed codex review does not save reviewed ref snapshot', async () => {
const run = makeRun({ baseSha: 'base-1', headSha: 'head-1', prNumber: 22 });
const { runner, manager } = createCodexRunnerForExecute(null);
const internal = runner as unknown as {
runCodexProcess: (
workspacePath: string,
runArg: ReviewRun,
lastReviewedHead?: string
) => Promise<void>;
};
internal.runCodexProcess = mock(async () => {
throw new Error('codex failed');
});
let caught: Error | undefined;
try {
await runner.execute(run);
} catch (error) {
caught = error as Error;
}
expect(caught).toBeDefined();
expect(caught?.message).toContain('codex failed');
expect(manager.saveReviewedRef).not.toHaveBeenCalled();
});
test('buildReviewPrompt includes incremental instructions when lastReviewedHead is set', () => {
const run = makeRun({ baseSha: 'base-a', headSha: 'head-b', prNumber: 7 });
const { runner } = createCodexRunnerForExecute(null);
const internal = runner as unknown as {
buildReviewPrompt: (runArg: ReviewRun, lastReviewedHead?: string) => string;
};
const prompt = internal.buildReviewPrompt(run, 'reviewed-head-123');
expect(prompt).toContain('增量审查模式:仅审查上次审查后的新变更');
expect(prompt).toContain('上次审查 SHAreviewed-head-123');
expect(prompt).toContain('git diff reviewed-head-123..head-b');
});
test('normalizeApiBaseUrl appends /v1 when missing', () => {
const { runner } = createCodexRunnerForExecute(null);
const internal = runner as unknown as {
normalizeApiBaseUrl: (rawUrl: string) => string;
};
expect(internal.normalizeApiBaseUrl('https://api.example.com')).toBe(
'https://api.example.com/v1'
);
expect(internal.normalizeApiBaseUrl('https://api.example.com/v1')).toBe(
'https://api.example.com/v1'
);
expect(internal.normalizeApiBaseUrl('https://api.example.com/')).toBe(
'https://api.example.com/v1'
);
});
});

View File

@@ -1,244 +0,0 @@
import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test';
import type { DiffExtractor } from '../context/diff-extractor';
import type { LocalRepoManager, LocalRepoPaths } from '../context/local-repo-manager';
import type { FileReviewStore } from '../store/file-review-store';
import type { Finding, ReviewContext, ReviewRun, ReviewTask } from '../types';
function makeRun(overrides: Partial<ReviewRun> = {}): ReviewRun {
return {
id: 'run-project-prompt',
idempotencyKey: 'owner/repo#8:base...head',
eventType: 'pull_request',
status: 'in_progress',
owner: 'owner',
repo: 'repo',
cloneUrl: 'https://example.com/repo.git',
prNumber: 8,
baseSha: 'base-sha',
headSha: 'head-sha',
attempts: 1,
maxAttempts: 3,
createdAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
...overrides,
};
}
function createStoreMock() {
return {
markRunIgnored: mock(async () => undefined),
addStep: mock(async () => undefined),
getRunDetails: mock(async () => ({ comments: [], findings: [] })),
addFindings: mock(async () => undefined),
markFindingPublished: mock(async () => true),
addCommentRecord: mock(async () => undefined),
};
}
function createLocalRepoManagerMock() {
const repoPaths: LocalRepoPaths = {
mirrorPath: '/tmp/mirror',
workspacePath: '/tmp/workspace',
};
return {
manager: {
prepareWorkspace: mock(async () => repoPaths),
resolveReviewedRef: mock(async () => null),
saveReviewedRef: mock(async () => undefined),
cleanupWorkspace: mock(async () => undefined),
},
repoPaths,
};
}
function createDiffExtractorMock() {
const context: ReviewContext = {
workspacePath: '/tmp/workspace',
mirrorPath: '/tmp/mirror',
diff: 'diff --git a/src/app.ts b/src/app.ts\n+const x = 1;',
changedFiles: [
{
path: 'src/app.ts',
status: 'M',
additions: 3,
deletions: 1,
},
],
parsedDiff: [],
fileContents: {},
};
return {
context,
extractor: {
getSandbox: mock(() => ({
execute: async () => ({ stdout: '', stderr: '', exitCode: 0 }),
})),
buildContext: mock(async () => context),
},
};
}
describe('project prompt wiring', () => {
beforeEach(() => {
mock.restore();
});
afterEach(() => {
mock.restore();
});
test('orchestrator forwards resolved project prompt to triage and specialist execution options', async () => {
const projectPrompt = `repo-policy-${'P'.repeat(360)}`;
mock.module('../project-review-prompt', () => ({
resolveProjectReviewPrompt: () => projectPrompt,
}));
const { ReviewOrchestrator } = await import('../orchestrator');
const store = createStoreMock();
const { manager } = createLocalRepoManagerMock();
const { extractor } = createDiffExtractorMock();
const orchestrator = new ReviewOrchestrator(
store as unknown as FileReviewStore,
manager as unknown as LocalRepoManager,
extractor as unknown as DiffExtractor
);
type TriageResultLike = {
complexity: 'trivial' | 'standard' | 'complex';
reviewSize: 'small' | 'medium' | 'large';
mode: 'skip' | 'light' | 'full';
tasks: ReviewTask[];
riskTags: string[];
rationale: string;
};
type ReviewFinding = Array<Omit<Finding, 'id' | 'runId' | 'published'>>;
type InternalOrchestrator = {
triageAgent: {
analyze: (
context: ReviewContext,
options?: { projectPrompt?: string }
) => Promise<TriageResultLike>;
};
agentMap: Record<
string,
{
reviewWithOptions: (
run: ReviewRun,
context: ReviewContext,
options: { projectPrompt?: string }
) => Promise<{ findings: ReviewFinding }>;
reviewWithReflection: (
run: ReviewRun,
context: ReviewContext,
maxRounds?: number,
options?: { projectPrompt?: string }
) => Promise<{ findings: ReviewFinding }>;
}
>;
judgeAgent: {
judge: (findings: ReviewFinding) => { summaryMarkdown: string; findings: ReviewFinding };
};
publishSummary: (run: ReviewRun, summary: string, gatedCount: number) => Promise<void>;
publishLineComments: (
run: ReviewRun,
comments: Array<{ path: string; line: number; comment: string }>
) => Promise<boolean>;
};
const internal = orchestrator as unknown as InternalOrchestrator;
const task: ReviewTask = {
domain: 'correctness',
paths: ['src/app.ts'],
riskTags: [],
mode: 'light',
tokenBudget: 1200,
maxIterations: 1,
allowTools: false,
allowReflection: false,
allowDebate: false,
};
const triageAnalyzeMock = mock(async () => ({
complexity: 'standard' as const,
reviewSize: 'small' as const,
mode: 'light' as const,
tasks: [task],
riskTags: [],
rationale: 'project prompt wiring test',
}));
const reviewWithOptionsMock = mock(async () => ({
findings: [] as ReviewFinding,
}));
const reviewWithReflectionMock = mock(async () => ({
findings: [] as ReviewFinding,
}));
internal.triageAgent = {
analyze: triageAnalyzeMock,
};
internal.agentMap = {
correctness: {
reviewWithOptions: reviewWithOptionsMock,
reviewWithReflection: reviewWithReflectionMock,
},
};
internal.judgeAgent = {
judge: mock(() => ({
summaryMarkdown: 'ok',
findings: [] as ReviewFinding,
})),
};
internal.publishSummary = mock(async () => undefined);
internal.publishLineComments = mock(async () => false);
const run = makeRun();
await orchestrator.execute(run);
expect(triageAnalyzeMock).toHaveBeenCalledWith(expect.anything(), { projectPrompt });
expect(reviewWithOptionsMock).toHaveBeenCalledWith(
run,
expect.anything(),
expect.objectContaining({ projectPrompt })
);
});
test('codex prompt builder includes resolved project-level prompt section', async () => {
const projectPrompt = `codex-policy-${'X'.repeat(320)}`;
mock.module('../project-review-prompt', () => ({
resolveProjectReviewPrompt: () => projectPrompt,
}));
const { CodexRunner } = await import('../codex/codex-runner');
const store = createStoreMock();
const { manager } = createLocalRepoManagerMock();
const runner = new CodexRunner(
store as unknown as FileReviewStore,
manager as unknown as LocalRepoManager
);
const internal = runner as unknown as {
buildReviewPrompt: (run: ReviewRun, lastReviewedHead?: string) => string;
};
const prompt = internal.buildReviewPrompt(makeRun(), undefined);
expect(prompt).toContain('## 项目级审查要求');
expect(prompt).toContain(projectPrompt);
});
});

View File

@@ -1,197 +0,0 @@
import { describe, expect, test } from 'bun:test';
import { applyPublishPolicy } from '../policy/publish-policy';
import type { Finding } from '../types';
type TestFinding = Omit<Finding, 'id' | 'runId' | 'published'>;
function makeFinding(overrides: Partial<TestFinding> = {}): TestFinding {
return {
fingerprint: `fp-${Math.random().toString(36).slice(2, 8)}`,
category: 'correctness',
severity: 'medium',
confidence: 0.9,
path: 'src/foo.ts',
line: 10,
title: 'Test finding',
detail: 'Detail',
evidence: 'Evidence',
suggestion: 'Fix it',
...overrides,
};
}
describe('applyPublishPolicy', () => {
const MIN_CONFIDENCE = 0.8;
// ─── Empty input ───
test('empty findings → all arrays empty', () => {
const result = applyPublishPolicy([], MIN_CONFIDENCE, false);
expect(result.publishable).toEqual([]);
expect(result.gated).toEqual([]);
expect(result.dropped).toEqual([]);
});
// ─── High confidence + medium/high severity → publishable ───
test('high severity + high confidence → publishable (humanGate off)', () => {
const f = makeFinding({ severity: 'high', confidence: 0.95 });
const result = applyPublishPolicy([f], MIN_CONFIDENCE, false);
expect(result.publishable).toHaveLength(1);
expect(result.gated).toHaveLength(0);
expect(result.dropped).toHaveLength(0);
});
test('medium severity + high confidence → publishable (humanGate off)', () => {
const f = makeFinding({ severity: 'medium', confidence: 0.85 });
const result = applyPublishPolicy([f], MIN_CONFIDENCE, false);
expect(result.publishable).toHaveLength(1);
expect(result.gated).toHaveLength(0);
expect(result.dropped).toHaveLength(0);
});
test('high severity + exactly at threshold → publishable', () => {
const f = makeFinding({ severity: 'high', confidence: 0.8 });
const result = applyPublishPolicy([f], MIN_CONFIDENCE, false);
expect(result.publishable).toHaveLength(1);
});
// ─── Low severity → never publishable (even with high confidence) ───
test('low severity + high confidence → dropped (humanGate off)', () => {
const f = makeFinding({ severity: 'low', confidence: 0.95 });
const result = applyPublishPolicy([f], MIN_CONFIDENCE, false);
expect(result.publishable).toHaveLength(0);
expect(result.gated).toHaveLength(0);
expect(result.dropped).toHaveLength(1);
});
test('low severity + high confidence → gated (humanGate on)', () => {
const f = makeFinding({ severity: 'low', confidence: 0.95 });
const result = applyPublishPolicy([f], MIN_CONFIDENCE, true);
expect(result.publishable).toHaveLength(0);
expect(result.gated).toHaveLength(1);
expect(result.dropped).toHaveLength(0);
});
// ─── Low confidence → not publishable ───
test('high severity + low confidence → dropped (humanGate off)', () => {
const f = makeFinding({ severity: 'high', confidence: 0.5 });
const result = applyPublishPolicy([f], MIN_CONFIDENCE, false);
expect(result.publishable).toHaveLength(0);
expect(result.dropped).toHaveLength(1);
});
test('high severity + low confidence → gated (humanGate on)', () => {
const f = makeFinding({ severity: 'high', confidence: 0.5 });
const result = applyPublishPolicy([f], MIN_CONFIDENCE, true);
expect(result.publishable).toHaveLength(0);
expect(result.gated).toHaveLength(1);
expect(result.dropped).toHaveLength(0);
});
test('medium severity + below threshold → dropped (humanGate off)', () => {
const f = makeFinding({ severity: 'medium', confidence: 0.7 });
const result = applyPublishPolicy([f], MIN_CONFIDENCE, false);
expect(result.publishable).toHaveLength(0);
expect(result.dropped).toHaveLength(1);
});
test('medium severity + below threshold → gated (humanGate on)', () => {
const f = makeFinding({ severity: 'medium', confidence: 0.7 });
const result = applyPublishPolicy([f], MIN_CONFIDENCE, true);
expect(result.publishable).toHaveLength(0);
expect(result.gated).toHaveLength(1);
});
// ─── Human gate ON: non-publishable → always gated, never dropped ───
test('humanGate on: low confidence low severity → gated', () => {
const f = makeFinding({ severity: 'low', confidence: 0.3 });
const result = applyPublishPolicy([f], MIN_CONFIDENCE, true);
expect(result.publishable).toHaveLength(0);
expect(result.gated).toHaveLength(1);
expect(result.dropped).toHaveLength(0);
});
// ─── Mixed findings ───
test('mixed findings split correctly', () => {
const findings: TestFinding[] = [
makeFinding({ severity: 'high', confidence: 0.95 }), // → publishable
makeFinding({ severity: 'medium', confidence: 0.85 }), // → publishable
makeFinding({ severity: 'low', confidence: 0.9 }), // → dropped (low severity, humanGate off)
makeFinding({ severity: 'high', confidence: 0.5 }), // → dropped (low confidence)
makeFinding({ severity: 'medium', confidence: 0.6 }), // → dropped (low confidence)
];
const result = applyPublishPolicy(findings, MIN_CONFIDENCE, false);
expect(result.publishable).toHaveLength(2);
expect(result.gated).toHaveLength(0);
expect(result.dropped).toHaveLength(3);
});
test('mixed findings with humanGate on', () => {
const findings: TestFinding[] = [
makeFinding({ severity: 'high', confidence: 0.95 }), // → publishable
makeFinding({ severity: 'low', confidence: 0.9 }), // → gated
makeFinding({ severity: 'high', confidence: 0.5 }), // → gated
];
const result = applyPublishPolicy(findings, MIN_CONFIDENCE, true);
expect(result.publishable).toHaveLength(1);
expect(result.gated).toHaveLength(2);
expect(result.dropped).toHaveLength(0);
});
// ─── Boundary: confidence exactly at threshold ───
test('confidence exactly at threshold + medium severity → publishable', () => {
const f = makeFinding({ severity: 'medium', confidence: MIN_CONFIDENCE });
const result = applyPublishPolicy([f], MIN_CONFIDENCE, false);
expect(result.publishable).toHaveLength(1);
});
test('confidence just below threshold + medium severity → dropped', () => {
const f = makeFinding({ severity: 'medium', confidence: MIN_CONFIDENCE - 0.01 });
const result = applyPublishPolicy([f], MIN_CONFIDENCE, false);
expect(result.dropped).toHaveLength(1);
});
// ─── All same fingerprint (policy doesn't dedup, that's judge's job) ───
test('all findings same fingerprint → all processed independently', () => {
const fp = 'shared-fingerprint';
const findings: TestFinding[] = [
makeFinding({ fingerprint: fp, severity: 'high', confidence: 0.9 }),
makeFinding({ fingerprint: fp, severity: 'medium', confidence: 0.85 }),
makeFinding({ fingerprint: fp, severity: 'low', confidence: 0.95 }),
];
const result = applyPublishPolicy(findings, MIN_CONFIDENCE, false);
// Policy doesn't care about fingerprint - each finding evaluated independently
expect(result.publishable).toHaveLength(2); // high+medium
expect(result.dropped).toHaveLength(1); // low severity
});
// ─── Different minConfidence thresholds ───
test('very low threshold → more findings publishable', () => {
const f = makeFinding({ severity: 'medium', confidence: 0.3 });
const result = applyPublishPolicy([f], 0.1, false);
expect(result.publishable).toHaveLength(1);
});
test('very high threshold → more findings dropped', () => {
const f = makeFinding({ severity: 'high', confidence: 0.95 });
const result = applyPublishPolicy([f], 0.99, false);
expect(result.dropped).toHaveLength(1);
});
// ─── Return value structure ───
test('returned findings preserve all original fields', () => {
const f = makeFinding({
severity: 'high',
confidence: 0.95,
path: 'src/important.ts',
line: 42,
title: 'Critical bug',
detail: 'Detailed explanation',
evidence: 'Code snippet',
suggestion: 'Fix suggestion',
category: 'security',
fingerprint: 'unique-fp-123',
});
const result = applyPublishPolicy([f], MIN_CONFIDENCE, false);
expect(result.publishable[0]).toEqual(f);
});
});

View File

@@ -1,476 +0,0 @@
import { describe, expect, mock, test } from 'bun:test';
import { z } from 'zod';
import type {
LLMChatRequest,
LLMChatResponse,
LLMMessage,
LLMToolDefinition,
ModelRole,
} from '../../llm/types';
import { SpecialistAgent } from '../agents/specialist-agent';
import { ToolRegistry } from '../tools/registry';
import type { Tool } from '../tools/types';
import type { FindingCategory, ReviewContext, ReviewRun } from '../types';
function makeRun(overrides: Partial<ReviewRun> = {}): ReviewRun {
return {
id: 'run-test-001',
idempotencyKey: 'idem-test',
eventType: 'pull_request',
status: 'in_progress',
owner: 'test-owner',
repo: 'test-repo',
cloneUrl: 'https://example.com/repo.git',
prNumber: 1,
baseSha: 'aaa',
headSha: 'bbb',
attempts: 0,
maxAttempts: 2,
createdAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
...overrides,
};
}
function makeContext(overrides: Partial<ReviewContext> = {}): ReviewContext {
return {
workspacePath: '/tmp/test-workspace',
mirrorPath: '/tmp/test-mirror',
diff: '--- a/src/foo.ts\n+++ b/src/foo.ts\n@@ -1,3 +1,4 @@\n+const x = null;\n export function foo() {}',
changedFiles: [{ path: 'src/foo.ts', status: 'M', additions: 1, deletions: 0 }],
parsedDiff: [
{
path: 'src/foo.ts',
changes: [{ lineNumber: 1, content: 'const x = null;', type: 'add' }],
},
],
fileContents: { 'src/foo.ts': 'const x = null;\nexport function foo() {}' },
...overrides,
};
}
function makeDummyTool(name = 'search_code'): Tool {
return {
name,
description: 'Search code in the workspace',
parameters: z.object({ query: z.string() }),
execute: async () => ({ results: [] }),
};
}
type ChatRequest = {
messages: LLMMessage[];
temperature?: number;
responseFormat?: 'text' | 'json';
tools?: LLMToolDefinition[];
providerOptions?: Record<string, unknown>;
};
type ChatCall = { role: ModelRole } & ChatRequest;
function createMockGateway(responses: Array<() => LLMChatResponse>) {
let callIndex = 0;
const calls: ChatCall[] = [];
return {
gateway: {
chatForRole: async (role: ModelRole, request: Omit<LLMChatRequest, 'model'>) => {
calls.push({ role, ...request });
const responseFn = responses[callIndex] ?? responses[responses.length - 1];
callIndex++;
return responseFn();
},
},
getCalls: () => calls,
};
}
function toolCallResponse(
toolCalls: Array<{ id: string; name: string; args: any }>
): LLMChatResponse {
return {
content: null,
toolCalls: toolCalls.map((tc) => ({
id: tc.id,
name: tc.name,
arguments: JSON.stringify(tc.args),
})),
finishReason: 'tool_calls',
usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
};
}
function jsonResponse(data: any): LLMChatResponse {
return {
content: JSON.stringify(data),
toolCalls: [],
finishReason: 'stop',
usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
};
}
function emptyResponse(): LLMChatResponse {
return {
content: null,
toolCalls: [],
finishReason: 'stop',
usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
};
}
describe('SpecialistAgent ReAct loop', () => {
const category: FindingCategory = 'correctness';
test('empty diff returns empty findings without calling OpenAI', async () => {
const { gateway } = createMockGateway([]);
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs');
const result = await agent.review(makeRun(), makeContext({ diff: ' ' }));
expect(result.findings).toHaveLength(0);
expect(result.agentName).toBe('TestAgent');
});
test('no toolRegistry → uses single-call json mode', async () => {
const finding = {
severity: 'high',
confidence: 0.9,
path: 'src/foo.ts',
line: 1,
title: 'Null assignment',
detail: 'x is null',
evidence: 'const x = null',
suggestion: 'Use undefined',
};
const { gateway, getCalls } = createMockGateway([() => jsonResponse({ findings: [finding] })]);
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs');
const result = await agent.review(makeRun(), makeContext());
expect(result.findings).toHaveLength(1);
expect(result.findings[0].severity).toBe('high');
expect(result.findings[0].category).toBe('correctness');
expect(result.findings[0].fingerprint).toBeTruthy();
const calls = getCalls();
expect(calls).toHaveLength(1);
expect(calls[0].responseFormat).toBe('json');
});
test('ReAct: tool call → tool result → final JSON findings', async () => {
const registry = new ToolRegistry();
const executeFn = mock(async () => ({ results: ['some code match'] }));
registry.register({ ...makeDummyTool(), execute: executeFn });
const finding = {
severity: 'medium',
confidence: 0.85,
path: 'src/foo.ts',
line: 1,
title: 'Potential null',
detail: 'Null assigned',
evidence: 'const x = null',
suggestion: 'Check usage',
};
const { gateway, getCalls } = createMockGateway([
() => toolCallResponse([{ id: 'call_1', name: 'search_code', args: { query: 'null' } }]),
() => jsonResponse({ findings: [finding], need_more_investigation: false }),
]);
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
const result = await agent.review(makeRun(), makeContext());
expect(executeFn).toHaveBeenCalledTimes(1);
expect(result.findings).toHaveLength(1);
expect(result.findings[0].category).toBe('correctness');
const calls = getCalls();
expect(calls).toHaveLength(2);
});
test('ReAct: default staged mode uses 2 iterations and forces final json', async () => {
const registry = new ToolRegistry();
registry.register(makeDummyTool());
const { gateway, getCalls } = createMockGateway([
() => toolCallResponse([{ id: 'call_1', name: 'search_code', args: { query: 'x' } }]),
() => jsonResponse({ findings: [], need_more_investigation: false }),
]);
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
await agent.review(makeRun(), makeContext());
const calls = getCalls();
expect(calls).toHaveLength(2);
expect(calls[0].providerOptions).toEqual({ tool_choice: 'auto' });
expect(calls[0].responseFormat).toBeUndefined();
expect(calls[1].providerOptions).toEqual({ tool_choice: 'none' });
expect(calls[1].responseFormat).toBe('json');
});
test('ReAct: dead-loop prevention — need_more_investigation=true but no tool call injects user prompt', async () => {
const registry = new ToolRegistry();
registry.register(makeDummyTool());
const { gateway, getCalls } = createMockGateway([
() => jsonResponse({ findings: [], need_more_investigation: true }),
() => jsonResponse({ findings: [], need_more_investigation: false }),
]);
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
await agent.review(makeRun(), makeContext());
const calls = getCalls();
expect(calls.length).toBeGreaterThanOrEqual(2);
const secondCallMessages = calls[1].messages;
const lastUserMsg = secondCallMessages.filter((m: any) => m.role === 'user').pop();
expect(lastUserMsg).toBeDefined();
if (!lastUserMsg) throw new Error('Expected user message in second call');
expect(lastUserMsg.content).toContain('使用工具');
});
test('ReAct: fingerprint dedup across iterations — later finding with same fp overwrites', async () => {
const registry = new ToolRegistry();
registry.register(makeDummyTool());
const findingV1 = {
severity: 'low' as const,
confidence: 0.6,
path: 'src/foo.ts',
line: 1,
title: 'Null issue',
detail: 'First version',
evidence: 'const x = null',
suggestion: 'Fix v1',
fingerprint: 'shared-fp-123',
};
const findingV2 = {
...findingV1,
severity: 'high' as const,
confidence: 0.95,
detail: 'Second version - more confident',
suggestion: 'Fix v2',
};
const { gateway } = createMockGateway([
() => jsonResponse({ findings: [findingV1], need_more_investigation: true }),
() => jsonResponse({ findings: [findingV2], need_more_investigation: false }),
]);
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
const result = await agent.review(makeRun(), makeContext());
expect(result.findings).toHaveLength(1);
expect(result.findings[0].severity).toBe('high');
expect(result.findings[0].confidence).toBe(0.95);
expect(result.findings[0].detail).toBe('Second version - more confident');
});
test('ReAct: multiple unique fingerprints accumulate', async () => {
const registry = new ToolRegistry();
registry.register(makeDummyTool());
const finding1 = {
severity: 'high' as const,
confidence: 0.9,
path: 'src/foo.ts',
line: 1,
title: 'Bug A',
detail: 'Detail A',
evidence: 'Evidence A',
suggestion: 'Fix A',
fingerprint: 'fp-aaa',
};
const finding2 = {
severity: 'medium' as const,
confidence: 0.8,
path: 'src/bar.ts',
line: 5,
title: 'Bug B',
detail: 'Detail B',
evidence: 'Evidence B',
suggestion: 'Fix B',
fingerprint: 'fp-bbb',
};
const { gateway } = createMockGateway([
() => jsonResponse({ findings: [finding1], need_more_investigation: true }),
() => jsonResponse({ findings: [finding2], need_more_investigation: false }),
]);
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
const result = await agent.review(makeRun(), makeContext());
expect(result.findings).toHaveLength(2);
const fps = result.findings.map((f) => f.fingerprint);
expect(fps).toContain('fp-aaa');
expect(fps).toContain('fp-bbb');
});
test('ReAct: OpenAI error returns empty findings gracefully', async () => {
const registry = new ToolRegistry();
registry.register(makeDummyTool());
const { gateway } = createMockGateway([
() => {
throw new Error('API rate limited');
},
]);
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
const result = await agent.review(makeRun(), makeContext());
expect(result.findings).toHaveLength(0);
expect(result.agentName).toBe('TestAgent');
});
test('ReAct: unknown tool call returns error result to model', async () => {
const registry = new ToolRegistry();
registry.register(makeDummyTool('search_code'));
const { gateway, getCalls } = createMockGateway([
() => toolCallResponse([{ id: 'call_1', name: 'nonexistent_tool', args: {} }]),
() => jsonResponse({ findings: [], need_more_investigation: false }),
]);
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
await agent.review(makeRun(), makeContext());
const calls = getCalls();
expect(calls).toHaveLength(2);
const toolResultMsg = calls[1].messages.find(
(m: any) => m.role === 'tool' && m.toolCallId === 'call_1'
);
expect(toolResultMsg).toBeTruthy();
if (!toolResultMsg) throw new Error('Expected tool result message');
const parsed = JSON.parse(toolResultMsg.content);
expect(parsed.error).toContain('未找到');
});
test('ReAct: tool execution error is captured and returned to model', async () => {
const registry = new ToolRegistry();
registry.register({
...makeDummyTool(),
execute: async () => {
throw new Error('Sandbox timeout');
},
});
const { gateway, getCalls } = createMockGateway([
() => toolCallResponse([{ id: 'call_1', name: 'search_code', args: { query: 'x' } }]),
() => jsonResponse({ findings: [], need_more_investigation: false }),
]);
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
await agent.review(makeRun(), makeContext());
const calls = getCalls();
const toolResultMsg = calls[1].messages.find(
(m: any) => m.role === 'tool' && m.toolCallId === 'call_1'
);
expect(toolResultMsg).toBeTruthy();
if (!toolResultMsg) throw new Error('Expected tool result message');
const parsed = JSON.parse(toolResultMsg.content);
expect(parsed.error).toContain('Sandbox timeout');
});
test('ReAct: empty choice content ends loop', async () => {
const registry = new ToolRegistry();
registry.register(makeDummyTool());
const { gateway } = createMockGateway([() => emptyResponse()]);
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
const result = await agent.review(makeRun(), makeContext());
expect(result.findings).toHaveLength(0);
});
test('ReAct: malformed JSON response ends loop gracefully', async () => {
const registry = new ToolRegistry();
registry.register(makeDummyTool());
const { gateway } = createMockGateway([
() => ({
content: 'not valid json {{{',
toolCalls: [],
finishReason: 'stop',
usage: { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
}),
]);
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
const result = await agent.review(makeRun(), makeContext());
expect(result.findings).toHaveLength(0);
});
test('staged context includes deleted lines metadata for review', async () => {
const { gateway, getCalls } = createMockGateway([
() =>
jsonResponse({
findings: [],
need_more_investigation: false,
}),
]);
const context = makeContext({
parsedDiff: [
{
path: 'src/foo.ts',
changes: [
{ lineNumber: 12, oldLineNumber: 11, content: 'if (auth) {', type: 'context' },
{ lineNumber: 12, oldLineNumber: 12, content: 'if (isAdmin(user)) {', type: 'delete' },
{ lineNumber: 13, oldLineNumber: 13, content: 'return true;', type: 'add' },
],
},
],
});
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs');
await agent.reviewWithOptions(makeRun(), context, {
mode: 'full',
allowTools: false,
scopePaths: ['src/foo.ts'],
maxContextTokens: 6000,
});
const calls = getCalls();
expect(calls).toHaveLength(1);
const userMessage = calls[0].messages.find((message) => message.role === 'user');
expect(userMessage).toBeDefined();
if (!userMessage) throw new Error('Expected user message in request');
expect(userMessage.content).toContain('"type": "delete"');
expect(userMessage.content).toContain('"oldLineNumber": 12');
});
test('ReAct: auto-generates fingerprint when finding has none', async () => {
const registry = new ToolRegistry();
registry.register(makeDummyTool());
const finding = {
severity: 'high' as const,
confidence: 0.9,
path: 'src/foo.ts',
line: 1,
title: 'Missing null check',
detail: 'Detail',
evidence: 'Evidence',
suggestion: 'Add check',
};
const { gateway } = createMockGateway([
() => jsonResponse({ findings: [finding], need_more_investigation: false }),
]);
const agent = new SpecialistAgent(gateway as any, category, 'TestAgent', 'bugs', registry);
const result = await agent.review(makeRun(), makeContext());
expect(result.findings).toHaveLength(1);
expect(result.findings[0].fingerprint).toBeTruthy();
expect(result.findings[0].fingerprint.length).toBeGreaterThan(0);
});
});

View File

@@ -1,8 +1,8 @@
import { describe, expect, test } from 'bun:test';
import type { LLMGateway } from '../../llm/gateway';
import type { LLMChatResponse, ModelRole } from '../../llm/types';
import type { LLMChatRequest, LLMChatResponse, ModelRole } from '../../llm/types';
import { TriageAgent } from '../agents/triage-agent';
import type { ChangedFile, FindingCategory, ReviewContext } from '../types';
import type { ChangedFile, ReviewContext } from '../types';
function makeChangedFile(overrides: Partial<ChangedFile> = {}): ChangedFile {
return {
@@ -37,58 +37,68 @@ function makeChatResponse(content: string | null): LLMChatResponse {
type ChatCall = {
role: ModelRole;
request: any;
request: Omit<LLMChatRequest, 'model'>;
};
function createMockGateway(
implementation: (role: ModelRole, request: any) => Promise<LLMChatResponse>
implementation: (
role: ModelRole,
request: Omit<LLMChatRequest, 'model'>
) => Promise<LLMChatResponse>
) {
const calls: ChatCall[] = [];
const gateway: Pick<LLMGateway, 'chatForRole'> = {
chatForRole: async (role: ModelRole, request: Omit<LLMChatRequest, 'model'>) => {
calls.push({ role, request });
return implementation(role, request);
},
};
return {
gateway: {
chatForRole: async (role: ModelRole, request: any) => {
calls.push({ role, request });
return implementation(role, request);
},
},
gateway,
getCalls: () => calls,
};
}
describe('TriageAgent task-based routing', () => {
test('heuristic: empty changedFiles -> skip mode with no tasks', async () => {
describe('TriageAgent hint-based planning', () => {
test('heuristic: empty changedFiles -> skip mode with hints only', async () => {
const { gateway, getCalls } = createMockGateway(async () =>
makeChatResponse(
JSON.stringify({
complexity: 'complex',
review_size: 'large',
mode: 'full',
relevant_domains: ['correctness', 'security', 'reliability', 'maintainability'],
suspected_entrypoints: ['src/ignored.ts'],
})
)
);
const agent = new TriageAgent(gateway as any);
const agent = new TriageAgent(gateway as unknown as LLMGateway);
const result = await agent.analyze(makeContext({ changedFiles: [] }));
expect(result.mode).toBe('skip');
expect(result.tasks).toHaveLength(0);
expect('tasks' in result).toBe(false);
expect(result.suspectedEntrypoints).toEqual([]);
expect(result.budgetHints).toEqual({
maxTurns: 0,
maxToolCalls: 0,
maxElapsedMs: 0,
tokenBudget: 0,
});
expect(result.changedFileSummary.totalFiles).toBe(0);
expect(getCalls()).toHaveLength(0);
});
test('heuristic: docs/assets only -> skip mode with no tasks', async () => {
test('heuristic: docs/assets only -> skip mode with hints only', async () => {
const { gateway, getCalls } = createMockGateway(async () =>
makeChatResponse(
JSON.stringify({
complexity: 'complex',
review_size: 'large',
mode: 'full',
relevant_domains: ['correctness', 'security', 'reliability', 'maintainability'],
suspected_entrypoints: ['src/ignored.ts'],
})
)
);
const agent = new TriageAgent(gateway as any);
const agent = new TriageAgent(gateway as unknown as LLMGateway);
const result = await agent.analyze(
makeContext({
@@ -101,13 +111,15 @@ describe('TriageAgent task-based routing', () => {
);
expect(result.mode).toBe('skip');
expect(result.tasks).toHaveLength(0);
expect('tasks' in result).toBe(false);
expect(result.suspectedEntrypoints).toEqual([]);
expect(result.changedFileSummary.files).toContain('M README.md (+1 -1)');
expect(getCalls()).toHaveLength(0);
});
test('heuristic: tiny single-file code change -> light correctness task', async () => {
test('heuristic: tiny single-file code change -> light hints only', async () => {
const { gateway, getCalls } = createMockGateway(async () => makeChatResponse(null));
const agent = new TriageAgent(gateway as any);
const agent = new TriageAgent(gateway as unknown as LLMGateway);
const result = await agent.analyze(
makeContext({
@@ -116,16 +128,24 @@ describe('TriageAgent task-based routing', () => {
);
expect(result.mode).toBe('light');
expect(result.tasks).toHaveLength(1);
expect(result.tasks[0].domain).toBe('correctness');
expect(result.tasks[0].allowTools).toBe(false);
expect(result.tasks[0].maxIterations).toBe(1);
expect('tasks' in result).toBe(false);
expect(result.suspectedEntrypoints).toEqual(['src/app.ts']);
expect(result.budgetHints).toMatchObject({
maxTurns: 4,
maxToolCalls: 4,
maxElapsedMs: 60_000,
});
expect(result.changedFileSummary).toMatchObject({
totalFiles: 1,
totalAdditions: 1,
totalDeletions: 2,
});
expect(getCalls()).toHaveLength(0);
});
test('heuristic: security-sensitive small change -> full correctness+security tasks', async () => {
test('heuristic: security-sensitive small change -> full hints only', async () => {
const { gateway, getCalls } = createMockGateway(async () => makeChatResponse(null));
const agent = new TriageAgent(gateway as any);
const agent = new TriageAgent(gateway as unknown as LLMGateway);
const result = await agent.analyze(
makeContext({
@@ -137,15 +157,20 @@ describe('TriageAgent task-based routing', () => {
);
expect(result.mode).toBe('full');
const domains = result.tasks.map((task) => task.domain);
expect(domains).toContain('correctness');
expect(domains).toContain('security');
expect('tasks' in result).toBe(false);
expect(result.riskTags).toContain('security-sensitive');
expect(result.suspectedEntrypoints).toEqual(['src/auth/service.ts', 'src/user/profile.ts']);
expect(result.budgetHints).toMatchObject({
maxTurns: 10,
maxToolCalls: 12,
maxElapsedMs: 180_000,
});
expect(getCalls()).toHaveLength(0);
});
test('heuristic: large PR by file count -> full mode with all domains', async () => {
test('heuristic: large PR by file count -> large full budget hints', async () => {
const { gateway, getCalls } = createMockGateway(async () => makeChatResponse(null));
const agent = new TriageAgent(gateway as any);
const agent = new TriageAgent(gateway as unknown as LLMGateway);
const changedFiles = Array.from({ length: 21 }, (_, index) =>
makeChangedFile({ path: `src/file-${index}.ts`, additions: 2, deletions: 1 })
@@ -155,31 +180,30 @@ describe('TriageAgent task-based routing', () => {
expect(result.mode).toBe('full');
expect(result.reviewSize).toBe('large');
expect(result.complexity).toBe('complex');
const expectedDomains: FindingCategory[] = [
'correctness',
'maintainability',
'reliability',
'security',
];
expect(result.tasks.map((task) => task.domain).sort()).toEqual(expectedDomains.sort());
expect('tasks' in result).toBe(false);
expect(result.budgetHints).toMatchObject({
maxTurns: 12,
maxToolCalls: 16,
maxElapsedMs: 240_000,
});
expect(result.suspectedEntrypoints).toHaveLength(12);
expect(result.changedFileSummary.files).toHaveLength(12);
expect(getCalls()).toHaveLength(0);
});
test('LLM fallback: inconclusive change uses planner and normalizes tasks', async () => {
test('LLM fallback: inconclusive change uses planner and normalizes hints', async () => {
const { gateway, getCalls } = createMockGateway(async () =>
makeChatResponse(
JSON.stringify({
complexity: 'standard',
review_size: 'medium',
mode: 'light',
relevant_domains: ['security', 'maintainability'],
risk_tags: ['security-sensitive'],
suspected_entrypoints: ['src/service/order.ts', 'src/controller/order.ts'],
rationale: '跨文件业务逻辑调整',
})
)
);
const agent = new TriageAgent(gateway as any);
const agent = new TriageAgent(gateway as unknown as LLMGateway);
const result = await agent.analyze(
makeContext({
@@ -197,11 +221,20 @@ describe('TriageAgent task-based routing', () => {
expect(calls[0].role).toBe('planner');
expect(calls[0].request.temperature).toBe(0);
expect(calls[0].request.responseFormat).toBe('json');
const plannerMessages = calls[0].request.messages as Array<{ role: string; content: string }>;
const plannerUserMessage = plannerMessages.find((message) => message.role === 'user');
expect(plannerUserMessage?.content).not.toContain('relevant_domains');
expect(plannerUserMessage?.content).not.toContain('"tasks"');
expect(plannerUserMessage?.content).not.toContain('可选领域');
expect(result.reviewSize).toBe('medium');
expect(result.mode).toBe('light');
expect(result.tasks.map((task) => task.domain)).toContain('correctness');
expect(result.tasks.map((task) => task.domain)).toContain('security');
expect('tasks' in result).toBe(false);
expect(result.suspectedEntrypoints).toEqual([
'src/service/order.ts',
'src/controller/order.ts',
]);
expect(result.riskTags).toEqual(['security-sensitive']);
expect(result.rationale).toBe('跨文件业务逻辑调整');
});
@@ -210,11 +243,10 @@ describe('TriageAgent task-based routing', () => {
const { gateway, getCalls } = createMockGateway(async () =>
makeChatResponse(
JSON.stringify({
complexity: 'standard',
review_size: 'medium',
mode: 'light',
relevant_domains: ['correctness'],
risk_tags: ['maintainability-hotspot'],
risk_tags: ['quality-sensitive'],
suspected_entrypoints: ['src/service/order.ts'],
rationale: '需要模型判断',
})
)
@@ -243,11 +275,11 @@ describe('TriageAgent task-based routing', () => {
expect(plannerSystemMessage?.content).toContain(longProjectPrompt);
});
test('LLM fallback: planner throws -> default full review with all domains', async () => {
test('LLM fallback: planner throws -> default full review hints', async () => {
const { gateway, getCalls } = createMockGateway(async () => {
throw new Error('planner unavailable');
});
const agent = new TriageAgent(gateway as any);
const agent = new TriageAgent(gateway as unknown as LLMGateway);
const result = await agent.analyze(
makeContext({
@@ -263,13 +295,13 @@ describe('TriageAgent task-based routing', () => {
expect(getCalls()).toHaveLength(1);
expect(result.mode).toBe('full');
const expectedDomains: FindingCategory[] = [
'correctness',
'maintainability',
'reliability',
'security',
];
expect(result.tasks.map((task) => task.domain).sort()).toEqual(expectedDomains.sort());
expect('tasks' in result).toBe(false);
expect(result.suspectedEntrypoints).toContain('src/service/foo.ts');
expect(result.budgetHints).toMatchObject({
maxTurns: 10,
maxToolCalls: 12,
maxElapsedMs: 180_000,
});
expect(result.rationale).toContain('LLM');
});
});

View File

@@ -0,0 +1,415 @@
import { createHash } from 'node:crypto';
import { getKernelAgentContext } from '../../agent-kernel/agents/kernel-agent-context';
import type { KernelHookRegistry } from '../../agent-kernel/hooks/kernel-hook-registry';
import config from '../../config';
import type { LLMGateway } from '../../llm/gateway';
import type { LLMMessage, LLMToolCall } from '../../llm/types';
import { mergeReviewPrompts, withGlobalPrompt } from '../../utils/global-prompt';
import { logger } from '../../utils/logger';
import { tokenCounter } from '../context/token-counter';
import { findingResponseSchema } from '../schema/finding-schema';
import { ToolRegistry } from '../tools/registry';
import { runToolOrchestration } from '../tools/tool-orchestration';
import type { ToolExecutionContext, ToolResult } from '../tools/types';
import {
AgentResult,
Finding,
FindingCategory,
REVIEW_DEFAULT_BUDGETS,
ReviewContext,
ReviewRun,
ReviewTask,
} from '../types';
export type AutonomousReviewState =
| 'investigating'
| 'tool_calling'
| 'synthesizing'
| 'finalizing'
| 'completed'
| 'failed';
interface AutonomousReviewDiagnostics {
scopedPaths?: string[];
compactContextTokens?: number;
iterations: number;
stateSequence: AutonomousReviewState[];
stopReason?: string;
toolCallNames: string[];
toolCallCount: number;
parsedFindingCount?: number;
finalResponsePreview?: string;
parseErrors: string[];
emptyResponseCount: number;
consecutiveToolFailures: number;
}
interface AutonomousReviewOptions {
projectPrompt?: string;
contextSummary?: string;
}
interface ResolvedBudget {
maxTurns: number;
maxToolCalls: number;
maxElapsedMs: number;
}
function buildFingerprint(category: string, path: string, line: number, title: string): string {
return createHash('sha256')
.update(`${category}:${path}:${line}:${title}`)
.digest('hex')
.slice(0, 24);
}
function previewContent(content: string | null | undefined): string | undefined {
if (!content) return undefined;
return content.length > 2000 ? `${content.slice(0, 2000)}` : content;
}
function resolveBudget(task: ReviewTask): ResolvedBudget {
const defaults =
task.mode === 'full' && task.reviewSize === 'large'
? REVIEW_DEFAULT_BUDGETS.largeFull
: task.mode === 'full'
? REVIEW_DEFAULT_BUDGETS.full
: REVIEW_DEFAULT_BUDGETS.light;
return {
maxTurns: Math.max(1, task.maxTurns ?? defaults.maxTurns),
maxToolCalls: Math.max(0, task.maxToolCalls ?? defaults.maxToolCalls),
maxElapsedMs: Math.max(1, task.maxElapsedMs ?? defaults.maxElapsedMs),
};
}
function toCompactContext(context: ReviewContext, task: ReviewTask): string {
const payload = JSON.stringify(
{
changedFiles: context.changedFiles,
diffSnippets: context.parsedDiff,
fileContents: context.fileContents,
},
null,
2
);
return tokenCounter.count(payload) > task.tokenBudget
? tokenCounter.clip(payload, task.tokenBudget)
: payload;
}
function buildSystemPrompt(toolRegistry?: ToolRegistry, projectPrompt?: string): string {
const toolList = toolRegistry?.getAll().length
? toolRegistry
.getAll()
.map((tool) => `- ${tool.name}: ${tool.description}`)
.join('\n')
: '无可用工具。';
return withGlobalPrompt(
`你是 Claude Code/Codex 风格的自主代码审查代理。你负责完整审查本次变更,不按 correctness/security/quality 拆分任务。
可用工具:
${toolList}
工作规则:
1. 先调查,再结论。你可以自主选择 search_code、read_file 或其他工具,不要等待外部程序替你选文件。
2. 不要按文件孤立审查;需要跨文件确认 API 持久化、状态流、权限、错误分支、边界条件和相似实现。
3. 仅报告有明确证据、会导致真实功能/安全/可靠性问题的 finding。
4. 当需要更多信息时直接调用工具;当调查完成时输出最终 JSON{"findings":[...]}。无问题返回 {"findings":[]}。
5. 每个 finding 必须包含 severity、confidence、path、line、title、detail、evidence、suggestion可选 category 为 correctness/security/quality。
6. suspectedEntrypoints 只是调查优先级提示不是范围过滤请仍然审查所有变更文件、diff 片段和可用文件内容。`,
mergeReviewPrompts(config.review.globalPrompt, projectPrompt)
);
}
function buildUserPrompt(
context: ReviewContext,
task: ReviewTask,
compactContext: string,
contextSummary?: string
): string {
const changedFiles = context.changedFiles
.map(
(file, index) =>
`${index + 1}. ${file.path} (+${file.additions}/-${file.deletions}, ${file.status})`
)
.join('\n');
return `审查任务:
- mode: ${task.mode}
- reviewSize: ${task.reviewSize ?? 'unknown'}
- riskTags: ${task.riskTags.length ? task.riskTags.join(', ') : 'none'}
- suspectedEntrypoints: ${task.suspectedEntrypoints?.length ? task.suspectedEntrypoints.join(', ') : 'none'}
- tokenBudget: ${task.tokenBudget}
${contextSummary ? `- compressedSummary: ${contextSummary}` : ''}
变更文件清单:
${changedFiles || '无变更文件'}
请自主调查这些变更,模型自己决定是否调用工具以及调用哪些工具。不要只凭文件名或 diff 猜测;完成调查后输出最终 JSON。
压缩上下文:
${compactContext}`;
}
export class AutonomousReviewAgent {
constructor(
private readonly gateway: LLMGateway,
private readonly toolRegistry?: ToolRegistry,
private readonly hookRegistry?: KernelHookRegistry,
private readonly agentName = 'AutonomousReviewAgent',
private readonly projectPrompt?: string
) {}
async review(run: ReviewRun, context: ReviewContext, task: ReviewTask): Promise<AgentResult> {
return this.reviewWithOptions(run, context, task);
}
async reviewWithOptions(
run: ReviewRun,
context: ReviewContext,
task: ReviewTask,
options: AutonomousReviewOptions = {}
): Promise<AgentResult> {
if (!context.diff.trim() || task.mode === 'skip') {
return { agentName: this.agentName, findings: [] };
}
const budget = resolveBudget(task);
const startTime = Date.now();
const compactContext = toCompactContext(context, task);
const diagnostics: AutonomousReviewDiagnostics = {
scopedPaths: task.suspectedEntrypoints,
compactContextTokens: tokenCounter.count(compactContext),
iterations: 0,
stateSequence: [],
toolCallNames: [],
toolCallCount: 0,
parseErrors: [],
emptyResponseCount: 0,
consecutiveToolFailures: 0,
};
const messages: LLMMessage[] = [
{
role: 'system',
content: buildSystemPrompt(this.toolRegistry, options.projectPrompt ?? this.projectPrompt),
},
{
role: 'user',
content: buildUserPrompt(context, task, compactContext, options.contextSummary),
},
];
let finalAnswer: string | undefined;
const transition = (next: AutonomousReviewState) => {
diagnostics.stateSequence.push(next);
};
try {
while (true) {
if (Date.now() - startTime >= budget.maxElapsedMs) {
diagnostics.stopReason = 'maxElapsedMs';
transition('synthesizing');
finalAnswer = await this.synthesizeFinalAnswer(messages, diagnostics);
transition('finalizing');
break;
}
if (diagnostics.iterations >= budget.maxTurns) {
diagnostics.stopReason = 'maxTurns';
transition('synthesizing');
finalAnswer = await this.synthesizeFinalAnswer(messages, diagnostics);
transition('finalizing');
break;
}
if (diagnostics.toolCallCount >= budget.maxToolCalls) {
diagnostics.stopReason = 'maxToolCalls';
transition('synthesizing');
finalAnswer = await this.synthesizeFinalAnswer(messages, diagnostics);
transition('finalizing');
break;
}
if (diagnostics.emptyResponseCount >= 2) {
diagnostics.stopReason = 'emptyResponses';
transition('synthesizing');
finalAnswer = await this.synthesizeFinalAnswer(messages, diagnostics);
transition('finalizing');
break;
}
if (diagnostics.consecutiveToolFailures >= 3) {
diagnostics.stopReason = 'toolFailures';
transition('synthesizing');
finalAnswer = await this.synthesizeFinalAnswer(messages, diagnostics);
transition('finalizing');
break;
}
transition('investigating');
diagnostics.iterations += 1;
const response = await this.gateway.chatForRole('specialist', {
messages,
temperature: 0,
tools: this.toolRegistry?.getAll().length
? this.toolRegistry.toToolDefinitions()
: undefined,
});
if (response.toolCalls.length > 0) {
const allowedToolCalls = response.toolCalls.slice(
0,
Math.max(0, budget.maxToolCalls - diagnostics.toolCallCount)
);
diagnostics.toolCallNames.push(...allowedToolCalls.map((toolCall) => toolCall.name));
diagnostics.toolCallCount += allowedToolCalls.length;
messages.push({
role: 'assistant',
content: response.content || '',
toolCalls: allowedToolCalls,
});
transition('tool_calling');
const toolResults = await this.executeTools(allowedToolCalls, {
workspacePath: context.workspacePath,
mirrorPath: context.mirrorPath,
runId: run.id,
});
const failures = toolResults.filter((toolResult) => !toolResult.success).length;
diagnostics.consecutiveToolFailures =
failures > 0 ? diagnostics.consecutiveToolFailures + failures : 0;
for (const toolResult of toolResults) {
messages.push({
role: 'tool',
toolCallId: toolResult.toolCallId,
content: JSON.stringify(toolResult.result || { error: toolResult.error }),
});
}
continue;
}
if (!response.content?.trim()) {
diagnostics.emptyResponseCount += 1;
messages.push({
role: 'assistant',
content: '',
});
continue;
}
diagnostics.stopReason = 'modelFinalized';
finalAnswer = response.content;
diagnostics.finalResponsePreview = previewContent(finalAnswer);
transition('finalizing');
break;
}
const findings = await this.finalizeFindings(
messages,
finalAnswer ?? '{"findings":[]}',
diagnostics
);
diagnostics.parsedFindingCount = findings.length;
transition('completed');
return {
agentName: this.agentName,
findings,
diagnostics,
};
} catch (error) {
transition('failed');
logger.error(`${this.agentName} 执行失败`, {
runId: run.id,
error: error instanceof Error ? error.message : String(error),
});
return { agentName: this.agentName, findings: [], diagnostics };
}
}
private async synthesizeFinalAnswer(
messages: LLMMessage[],
diagnostics: AutonomousReviewDiagnostics
): Promise<string> {
messages.push({
role: 'user',
content:
'你已达到停止条件。请基于现有调查证据输出最终 JSON{"findings":[...]}。不要调用工具,不要输出额外文字。',
});
const response = await this.gateway.chatForRole('specialist', {
messages,
temperature: 0,
responseFormat: 'json',
});
const content = response.content || '{"findings":[]}';
diagnostics.finalResponsePreview = previewContent(content);
messages.push({ role: 'assistant', content });
return content;
}
private async finalizeFindings(
messages: LLMMessage[],
content: string,
diagnostics: AutonomousReviewDiagnostics
): Promise<Omit<Finding, 'id' | 'runId' | 'published'>[]> {
let current = content;
for (let attempt = 0; attempt <= 2; attempt++) {
try {
const parsed = findingResponseSchema.parse(JSON.parse(current));
return parsed.findings.map((item) => {
const category: FindingCategory = item.category ?? 'correctness';
return {
...item,
category,
fingerprint:
item.fingerprint || buildFingerprint(category, item.path, item.line, item.title),
};
});
} catch (parseError) {
const message = parseError instanceof Error ? parseError.message : String(parseError);
diagnostics.parseErrors.push(message);
if (attempt === 2) {
throw parseError;
}
messages.push({ role: 'assistant', content: current });
messages.push({
role: 'user',
content:
'上一次最终结果无法通过 findingResponseSchema。请修复为严格 JSON{"findings":[{"severity":"high|medium|low","confidence":0.0,"path":"...","line":1,"title":"...","detail":"...","evidence":"...","suggestion":"..."}]}。不要输出额外文字。',
});
const repair = await this.gateway.chatForRole('specialist', {
messages,
temperature: 0,
responseFormat: 'json',
});
current = repair.content || '{"findings":[]}';
diagnostics.finalResponsePreview = previewContent(current);
}
}
return [];
}
private async executeTools(
toolCalls: LLMToolCall[],
context: ToolExecutionContext
): Promise<ToolResult[]> {
if (!this.toolRegistry || toolCalls.length === 0) {
return [];
}
const agentContext = getKernelAgentContext();
const orchestration = await runToolOrchestration({
registry: this.toolRegistry,
toolCalls,
context: {
...context,
agentName: this.agentName,
agentId: agentContext?.agentId,
source: 'react',
},
hookRegistry: this.hookRegistry,
});
return orchestration.results;
}
}

View File

@@ -1,17 +0,0 @@
import type { LLMGateway } from '../../llm/gateway';
import type { LearningSystem } from '../learning/learning-system';
import { ToolRegistry } from '../tools/registry';
import { SpecialistAgent } from './specialist-agent';
export class CorrectnessAgent extends SpecialistAgent {
constructor(gateway: LLMGateway, toolRegistry?: ToolRegistry, learningSystem?: LearningSystem) {
super(
gateway,
'correctness',
'Correctness Agent',
'业务逻辑正确性、边界条件、空值处理和明显bug',
toolRegistry,
learningSystem
);
}
}

View File

@@ -1,347 +0,0 @@
import config from '../../config';
import type { LLMGateway } from '../../llm/gateway';
import type { LLMMessage } from '../../llm/types';
import { mergeReviewPrompts, withCoreGlobalPrompt } from '../../utils/global-prompt';
import { logger } from '../../utils/logger';
import { Finding, FindingSeverity } from '../types';
import { SpecialistAgent } from './specialist-agent';
interface AgentOpinion {
agentName: string;
confidence: number;
severity: FindingSeverity;
reasoning: string;
isValid: boolean;
}
export class DebateOrchestrator {
private gateway: LLMGateway;
constructor(gateway: LLMGateway) {
this.gateway = gateway;
}
async conductDebate(
finding: Omit<Finding, 'id' | 'runId' | 'published'>,
agents: SpecialistAgent[],
maxRounds = 2,
projectPrompt?: string
): Promise<Omit<Finding, 'id' | 'runId' | 'published'>> {
if (agents.length < 2) {
logger.debug('Debate需要至少2个agents跳过');
return finding;
}
logger.info('启动Debate', {
finding: finding.title,
agentsCount: agents.length,
maxRounds,
});
const opinions = new Map<string, AgentOpinion>();
// 收集初始意见
for (const agent of agents) {
const opinion = await this.getAgentOpinion(agent, finding, projectPrompt);
opinions.set((agent as any).agentName, opinion);
}
// 辩论轮次
for (let round = 0; round < maxRounds; round++) {
logger.debug(`Debate Round ${round + 1}/${maxRounds}`, {
finding: finding.title,
});
for (const agent of agents) {
const agentName = (agent as any).agentName;
const otherOpinions = Array.from(opinions.entries()).filter(([name]) => name !== agentName);
const revisedOpinion = await this.reviseOpinion(
agent,
finding,
otherOpinions,
opinions,
projectPrompt
);
opinions.set(agentName, revisedOpinion);
}
// 检查是否已达成共识
if (this.hasConsensus(opinions)) {
logger.info(`Debate在第${round + 1}轮达成共识`, {
finding: finding.title,
});
break;
}
}
// 形成共识
return this.formConsensus(finding, opinions);
}
private async getAgentOpinion(
agent: SpecialistAgent,
finding: Omit<Finding, 'id' | 'runId' | 'published'>,
projectPrompt?: string
): Promise<AgentOpinion> {
const agentName = (agent as any).agentName;
const prompt = `你是${agentName}。评估以下代码问题的严重性、置信度和有效性。
问题:
- Title: ${finding.title}
- Detail: ${finding.detail}
- Evidence: ${finding.evidence}
- Current Severity: ${finding.severity}
- Current Confidence: ${finding.confidence}
从你的专业角度判断:
1. 这个问题是否真实存在(不是误报)?
2. 严重性评估是否准确?
3. 你的置信度是多少?
4. 你的判断理由?
返回JSON
{
"is_valid": true/false,
"confidence": 0.0-1.0,
"severity": "high" | "medium" | "low",
"reasoning": "你的判断理由(详细说明)"
}`;
try {
const messages: LLMMessage[] = [
{
role: 'system',
content: withCoreGlobalPrompt(
`你是${agentName},从你的专业角度独立评估代码问题。`,
mergeReviewPrompts(config.review.globalPrompt, projectPrompt)
),
},
{ role: 'user', content: prompt },
];
const response = await this.gateway.chatForRole('specialist', {
messages,
temperature: 0.2,
responseFormat: 'json',
});
const content = response.content;
if (!content) {
throw new Error('Agent opinion返回空');
}
const parsed = JSON.parse(content);
return {
agentName,
// 使用 ?? 而非 ||保留有效的0置信度完全不确定/强烈拒绝)
confidence: parsed.confidence ?? 0.5,
severity: parsed.severity || 'medium',
reasoning: parsed.reasoning || '',
isValid: parsed.is_valid ?? true,
};
} catch (error) {
logger.error(`获取${agentName}意见失败`, {
error: error instanceof Error ? error.message : String(error),
});
return {
agentName,
confidence: finding.confidence,
severity: finding.severity,
reasoning: '获取意见失败,使用默认值',
isValid: true,
};
}
}
private async reviseOpinion(
agent: SpecialistAgent,
finding: Omit<Finding, 'id' | 'runId' | 'published'>,
otherOpinions: [string, AgentOpinion][],
opinions: Map<string, AgentOpinion>,
projectPrompt?: string
): Promise<AgentOpinion> {
const agentName = (agent as any).agentName;
const prompt = `你是${agentName}。重新评估以下问题,考虑其他专家的意见。
问题:
- Title: ${finding.title}
- Evidence: ${finding.evidence}
其他专家意见:
${otherOpinions
.map(
([name, op]) =>
`- ${name}: ${op.isValid ? '有效' : '误报'}, ${op.severity} (置信度 ${op.confidence.toFixed(2)})\n 理由: ${
op.reasoning
}`
)
.join('\n')}
基于同行的意见,你是否改变观点?
返回JSON
{
"is_valid": true/false,
"confidence": 0.0-1.0,
"severity": "high" | "medium" | "low",
"reasoning": "修正后的理由或坚持原判断的原因"
}`;
try {
const messages: LLMMessage[] = [
{
role: 'system',
content: withCoreGlobalPrompt(
`你是${agentName},根据同行意见重新评估,但也要坚持你的专业判断。`,
mergeReviewPrompts(config.review.globalPrompt, projectPrompt)
),
},
{ role: 'user', content: prompt },
];
const response = await this.gateway.chatForRole('specialist', {
messages,
temperature: 0.3, // 允许一定灵活性
responseFormat: 'json',
});
const content = response.content;
if (!content) {
throw new Error('Revised opinion返回空');
}
const parsed = JSON.parse(content);
return {
agentName,
// 使用 ?? 而非 ||保留有效的0置信度完全不确定/强烈拒绝)
confidence: parsed.confidence ?? 0.5,
severity: parsed.severity || 'medium',
reasoning: parsed.reasoning || '',
isValid: parsed.is_valid ?? true,
};
} catch (error) {
logger.error(`${agentName}修订意见失败`, {
error: error instanceof Error ? error.message : String(error),
});
// 返回当前意见从opinions Map中获取
const currentOpinion = opinions.get(agentName);
return (
currentOpinion || {
agentName,
confidence: 0.5,
severity: 'medium',
reasoning: '修订失败',
isValid: true,
}
);
}
}
private hasConsensus(opinions: Map<string, AgentOpinion>): boolean {
const votes = Array.from(opinions.values());
if (votes.length === 0) return true;
// 检查有效性共识至少80%同意)
const validCount = votes.filter((v) => v.isValid).length;
const validRatio = validCount / votes.length;
if (validRatio >= 0.8 || validRatio <= 0.2) {
return true; // 大多数同意有效或无效
}
// 检查严重性共识
const severityCounts: Record<FindingSeverity, number> = {
high: 0,
medium: 0,
low: 0,
};
votes.forEach((v) => {
severityCounts[v.severity]++;
});
const maxCount = Math.max(...Object.values(severityCounts));
const consensusRatio = maxCount / votes.length;
return consensusRatio >= 0.7; // 70%同意同一严重性
}
private formConsensus(
finding: Omit<Finding, 'id' | 'runId' | 'published'>,
opinions: Map<string, AgentOpinion>
): Omit<Finding, 'id' | 'runId' | 'published'> {
const votes = Array.from(opinions.values());
if (votes.length === 0) {
return finding;
}
// 判断有效性(投票)
const validCount = votes.filter((v) => v.isValid).length;
const isValid = validCount > votes.length / 2;
// 如果被判定为无效,降低置信度
if (!isValid) {
logger.info('Debate判定为无效finding', {
finding: finding.title,
validVotes: validCount,
totalVotes: votes.length,
});
return {
...finding,
confidence: Math.min(finding.confidence, 0.4),
detail: `${finding.detail}\n\n**Debate结果**: 多数专家认为此问题可能是误报(${validCount}/${votes.length}认为有效)`,
};
}
// 计算平均置信度仅计算认为有效的votes
const validVotes = votes.filter((v) => v.isValid);
const avgConfidence = validVotes.reduce((sum, v) => sum + v.confidence, 0) / validVotes.length;
// 严重性投票(加权)
const severityVotes: Record<FindingSeverity, number> = {
high: 0,
medium: 0,
low: 0,
};
validVotes.forEach((vote) => {
severityVotes[vote.severity] += vote.confidence;
});
const agreedSeverity =
(Object.entries(severityVotes).sort((a, b) => b[1] - a[1])[0][0] as FindingSeverity) ||
finding.severity;
// 综合推理
const synthesizedDetail = `${finding.detail}\n\n**专家Debate意见汇总**\n${validVotes
.map(
(v) => `- ${v.agentName} (${v.severity}, 置信度${v.confidence.toFixed(2)}): ${v.reasoning}`
)
.join('\n')}`;
logger.info('Debate达成共识', {
finding: finding.title,
originalSeverity: finding.severity,
agreedSeverity,
originalConfidence: finding.confidence,
avgConfidence,
validVotes: validVotes.length,
});
return {
...finding,
confidence: avgConfidence,
severity: agreedSeverity,
detail: synthesizedDetail,
};
}
}

View File

@@ -1,54 +0,0 @@
import { Finding, ReviewDecision } from '../types';
const severityWeight: Record<Finding['severity'], number> = {
high: 3,
medium: 2,
low: 1,
};
function summarizeFindings(findings: Omit<Finding, 'id' | 'runId' | 'published'>[]): string {
if (findings.length === 0) {
return '本次变更未发现需要立即处理的高置信问题。建议人工快速复核关键业务路径。';
}
const total = findings.length;
const high = findings.filter((item) => item.severity === 'high').length;
const medium = findings.filter((item) => item.severity === 'medium').length;
const low = findings.filter((item) => item.severity === 'low').length;
return [
`本次 AI Agent 审查共识别 ${total} 个问题,其中 high ${high} 个、medium ${medium} 个、low ${low} 个。`,
'以下评论按风险优先级自动发布,建议优先处理 high 与 medium 项。',
].join('\n\n');
}
export class JudgeAgent {
judge(results: Omit<Finding, 'id' | 'runId' | 'published'>[]): ReviewDecision {
const bestByFingerprint = new Map<string, Omit<Finding, 'id' | 'runId' | 'published'>>();
for (const finding of results) {
const existing = bestByFingerprint.get(finding.fingerprint);
if (!existing) {
bestByFingerprint.set(finding.fingerprint, finding);
continue;
}
const existingWeight = severityWeight[existing.severity] * existing.confidence;
const currentWeight = severityWeight[finding.severity] * finding.confidence;
if (currentWeight > existingWeight) {
bestByFingerprint.set(finding.fingerprint, finding);
}
}
const findings = [...bestByFingerprint.values()].sort((a, b) => {
const scoreA = severityWeight[a.severity] * a.confidence;
const scoreB = severityWeight[b.severity] * b.confidence;
return scoreB - scoreA;
});
return {
summaryMarkdown: summarizeFindings(findings),
findings,
};
}
}

View File

@@ -1,17 +0,0 @@
import type { LLMGateway } from '../../llm/gateway';
import type { LearningSystem } from '../learning/learning-system';
import { ToolRegistry } from '../tools/registry';
import { SpecialistAgent } from './specialist-agent';
export class MaintainabilityAgent extends SpecialistAgent {
constructor(gateway: LLMGateway, toolRegistry?: ToolRegistry, learningSystem?: LearningSystem) {
super(
gateway,
'maintainability',
'Maintainability Agent',
'可维护性、复杂度、接口破坏风险和可测试性不足',
toolRegistry,
learningSystem
);
}
}

View File

@@ -1,194 +0,0 @@
import { createHash } from 'node:crypto';
import config from '../../config';
import type { LLMGateway } from '../../llm/gateway';
import type { LLMMessage } from '../../llm/types';
import { mergeReviewPrompts, withGlobalPrompt } from '../../utils/global-prompt';
import { logger } from '../../utils/logger';
import { tokenCounter } from '../context/token-counter';
import { LearningSystem } from '../learning/learning-system';
import { findingResponseSchema } from '../schema/finding-schema';
import { ToolRegistry } from '../tools/registry';
import { AgentResult, Finding, FindingCategory, ReviewContext, ReviewRun } from '../types';
import { CriticAgent, CritiqueResult } from './critic-agent';
import { SpecialistAgent, type SpecialistReviewOptions } from './specialist-agent';
function buildFingerprint(category: string, path: string, line: number, title: string): string {
return createHash('sha256')
.update(`${category}:${path}:${line}:${title}`)
.digest('hex')
.slice(0, 24);
}
export class ReflexionAgent extends SpecialistAgent {
private criticAgent: CriticAgent;
constructor(
gateway: LLMGateway,
category: FindingCategory,
agentName: string,
focusPrompt: string,
toolRegistry?: ToolRegistry,
learningSystem?: LearningSystem
) {
super(gateway, category, agentName, focusPrompt, toolRegistry, learningSystem);
this.criticAgent = new CriticAgent(gateway);
}
async reviewWithReflection(
run: ReviewRun,
context: ReviewContext,
maxReflectionRounds = 2,
options?: SpecialistReviewOptions
): Promise<AgentResult> {
let bestFindings: Omit<Finding, 'id' | 'runId' | 'published'>[] = [];
let bestQualityScore = 0;
let currentFindings: Omit<Finding, 'id' | 'runId' | 'published'>[] = [];
const projectPrompt = options?.projectPrompt;
for (let round = 0; round < maxReflectionRounds; round++) {
logger.info(`${this.agentName} Reflection Round ${round + 1}/${maxReflectionRounds}`, {
runId: run.id,
});
// 生成初步findings首轮或基于上一轮refined结果
const draft = await this.generateDraft(run, context, currentFindings, round, options);
// 自我批评
const critique = await this.criticAgent.critique(draft, context, projectPrompt);
logger.info(`${this.agentName} Critique结果`, {
runId: run.id,
round: round + 1,
qualityScore: critique.qualityScore,
issuesFound: critique.issues.length,
missedIssues: critique.missedIssues.length,
});
// 如果质量已经很好,提前结束并保存最佳结果
if (critique.qualityScore >= 0.9 && critique.issues.length === 0) {
bestFindings = draft;
bestQualityScore = critique.qualityScore;
logger.info(`${this.agentName} 质量满足要求提前结束Reflection`, {
runId: run.id,
finalScore: critique.qualityScore,
});
break;
}
// 如果这轮质量更好,保存为最佳结果
if (critique.qualityScore > bestQualityScore) {
bestQualityScore = critique.qualityScore;
bestFindings = draft;
}
// 如果还有改进空间继续优化refine后需要在下一轮重新评估
if (round < maxReflectionRounds - 1) {
currentFindings = await this.refine(draft, critique, context, run, projectPrompt);
}
}
return {
agentName: this.agentName,
findings: bestFindings,
};
}
private async generateDraft(
run: ReviewRun,
context: ReviewContext,
previousFindings: Omit<Finding, 'id' | 'runId' | 'published'>[],
round: number,
options?: SpecialistReviewOptions
): Promise<Omit<Finding, 'id' | 'runId' | 'published'>[]> {
// 第一轮使用父类的review方法
if (round === 0) {
const result = await super.reviewWithOptions(run, context, options);
return result.findings;
}
// 后续轮次在前一轮基础上改进由refine方法生成
return previousFindings;
}
private async refine(
draft: Omit<Finding, 'id' | 'runId' | 'published'>[],
critique: CritiqueResult,
context: ReviewContext,
run: ReviewRun,
projectPrompt?: string
): Promise<Omit<Finding, 'id' | 'runId' | 'published'>[]> {
const prompt = `你是${this.agentName}。根据以下批评意见,改进审查结果。
原始findings${draft.length}个):
${JSON.stringify(draft, null, 2)}
Critic Agent的批评意见
质量评分: ${critique.qualityScore}
发现的问题(${critique.issues.length}个):
${critique.issues.map((issue) => `- Finding #${issue.findingIndex}: ${issue.problem}\n 建议: ${issue.suggestion}`).join('\n')}
可能遗漏的问题(${critique.missedIssues.length}个):
${critique.missedIssues.map((missed) => `- ${missed}`).join('\n')}
总体评估: ${critique.overallAssessment}
代码上下文:
${tokenCounter.clip(context.diff, 1000)}
任务:
1. 修正有问题的findings根据批评意见
2. 补充遗漏的问题(如果确实存在)
3. 移除误报
4. 提升evidence的充分性和具体性
返回改进后的findings JSON数组格式
{
"findings": [...]
}`;
try {
const messages: LLMMessage[] = [
{
role: 'system',
content: withGlobalPrompt(
`你是${this.agentName},根据批评反馈改进审查结果。`,
mergeReviewPrompts(config.review.globalPrompt, projectPrompt)
),
},
{ role: 'user', content: prompt },
];
const response = await this.gateway.chatForRole('specialist', {
messages,
temperature: 0.1,
responseFormat: 'json',
});
const content = response.content;
if (!content) {
logger.warn(`${this.agentName} Refine返回空结果使用原findings`);
return draft;
}
const parsed = JSON.parse(content);
// 使用schema验证refined findings防止畸形数据流入发布系统
const validated = findingResponseSchema.parse({ findings: parsed.findings || draft });
// 标准化category和fingerprint
return validated.findings.map((finding) => ({
...finding,
category: this.category,
fingerprint:
finding.fingerprint ||
buildFingerprint(this.category, finding.path, finding.line, finding.title),
}));
} catch (error) {
logger.error(`${this.agentName} Refine失败`, {
runId: run.id,
error: error instanceof Error ? error.message : String(error),
});
return draft; // 失败时返回原findings
}
}
}

View File

@@ -1,17 +0,0 @@
import type { LLMGateway } from '../../llm/gateway';
import type { LearningSystem } from '../learning/learning-system';
import { ToolRegistry } from '../tools/registry';
import { SpecialistAgent } from './specialist-agent';
export class ReliabilityAgent extends SpecialistAgent {
constructor(gateway: LLMGateway, toolRegistry?: ToolRegistry, learningSystem?: LearningSystem) {
super(
gateway,
'reliability',
'Reliability Agent',
'错误处理、重试策略、幂等性、并发一致性和资源释放',
toolRegistry,
learningSystem
);
}
}

View File

@@ -1,17 +0,0 @@
import type { LLMGateway } from '../../llm/gateway';
import type { LearningSystem } from '../learning/learning-system';
import { ToolRegistry } from '../tools/registry';
import { SpecialistAgent } from './specialist-agent';
export class SecurityAgent extends SpecialistAgent {
constructor(gateway: LLMGateway, toolRegistry?: ToolRegistry, learningSystem?: LearningSystem) {
super(
gateway,
'security',
'Security Agent',
'注入漏洞、权限绕过、敏感信息泄露、反序列化和输入校验缺失',
toolRegistry,
learningSystem
);
}
}

View File

@@ -1,484 +0,0 @@
import { createHash } from 'node:crypto';
import config from '../../config';
import type { LLMGateway } from '../../llm/gateway';
import type { LLMMessage, LLMToolCall } from '../../llm/types';
import { mergeReviewPrompts, withGlobalPrompt } from '../../utils/global-prompt';
import { logger } from '../../utils/logger';
import { tokenCounter } from '../context/token-counter';
import type { LearningSystem } from '../learning/learning-system';
import { findingResponseSchema } from '../schema/finding-schema';
import { ToolRegistry } from '../tools/registry';
import type { ToolExecutionContext, ToolResult } from '../tools/types';
import {
AgentResult,
Finding,
FindingCategory,
ReviewContext,
ReviewMode,
ReviewRun,
} from '../types';
function buildFingerprint(category: string, path: string, line: number, title: string): string {
return createHash('sha256')
.update(`${category}:${path}:${line}:${title}`)
.digest('hex')
.slice(0, 24);
}
interface CompactContextOptions {
scopePaths?: string[];
maxContextTokens?: number;
}
export interface SpecialistReviewOptions {
scopePaths?: string[];
allowTools?: boolean;
maxIterations?: number;
mode?: ReviewMode;
maxContextTokens?: number;
projectPrompt?: string;
}
function toCompactContext(context: ReviewContext, options?: CompactContextOptions): string {
const MAX_CONTEXT_TOKENS = options?.maxContextTokens ?? 25_000;
const scopedPaths = options?.scopePaths ? new Set(options.scopePaths) : null;
const scopedChangedFiles = scopedPaths
? context.changedFiles.filter((file) => scopedPaths.has(file.path))
: context.changedFiles;
const scopedParsedDiff = scopedPaths
? context.parsedDiff.filter((file) => scopedPaths.has(file.path))
: context.parsedDiff;
const scopedFileContents = scopedPaths
? Object.fromEntries(
Object.entries(context.fileContents).filter(([filePath]) => scopedPaths.has(filePath))
)
: context.fileContents;
const files = scopedChangedFiles.map((file) => ({
path: file.path,
status: file.status,
additions: file.additions,
deletions: file.deletions,
}));
// 策略:逐步缩减直到满足 token 限制
// 1. changedFiles元数据小且必需
// 2. parsedDiff关键逐步减少每个文件的changes数量
// 3. fileContents最大按需截断或移除部分文件
let maxChangesPerFile = 200;
let maxFileContentsEntries = Object.keys(scopedFileContents).length;
const tryBuild = (changesLimit: number, contentEntriesLimit: number): string => {
const snippets = scopedParsedDiff.map((file) => ({
path: file.path,
changes: file.changes.slice(0, changesLimit),
}));
const limitedContents: Record<string, string> = {};
const contentKeys = Object.keys(scopedFileContents);
for (let i = 0; i < Math.min(contentEntriesLimit, contentKeys.length); i++) {
const key = contentKeys[i];
limitedContents[key] = scopedFileContents[key];
}
return JSON.stringify(
{
changedFiles: files,
diffSnippets: snippets,
fileContents: limitedContents,
},
null,
2
);
};
let result = tryBuild(maxChangesPerFile, maxFileContentsEntries);
// 如果超过 token 限制,逐步缩减
while (
tokenCounter.count(result) > MAX_CONTEXT_TOKENS &&
(maxChangesPerFile > 20 || maxFileContentsEntries > 0)
) {
if (maxChangesPerFile > 20) {
maxChangesPerFile = Math.max(20, Math.floor(maxChangesPerFile * 0.7));
} else if (maxFileContentsEntries > 0) {
maxFileContentsEntries = Math.max(0, Math.floor(maxFileContentsEntries * 0.5));
}
result = tryBuild(maxChangesPerFile, maxFileContentsEntries);
}
// 如果仍然超限,强制截断
if (tokenCounter.count(result) > MAX_CONTEXT_TOKENS) {
logger.warn('Context size still exceeds token limit after reduction, truncating', {
estimatedTokens: tokenCounter.count(result),
limit: MAX_CONTEXT_TOKENS,
});
result = tokenCounter.clip(result, MAX_CONTEXT_TOKENS);
}
return result;
}
export class SpecialistAgent {
constructor(
protected readonly gateway: LLMGateway,
protected readonly category: FindingCategory,
protected readonly agentName: string,
protected readonly focusPrompt: string,
protected readonly toolRegistry?: ToolRegistry,
protected readonly learningSystem?: LearningSystem
) {}
async review(run: ReviewRun, context: ReviewContext): Promise<AgentResult> {
return this.reviewWithOptions(run, context);
}
async reviewWithOptions(
run: ReviewRun,
context: ReviewContext,
options?: SpecialistReviewOptions
): Promise<AgentResult> {
if (!context.diff.trim()) {
return { agentName: this.agentName, findings: [] };
}
if (options?.mode === 'skip') {
return { agentName: this.agentName, findings: [] };
}
if (
!this.toolRegistry ||
this.toolRegistry.getAll().length === 0 ||
options?.allowTools === false
) {
return this.reviewSinglePass(run, context, options);
}
// ReAct循环模式
return this.reviewWithReAct(run, context, options);
}
private async reviewSinglePass(
run: ReviewRun,
context: ReviewContext,
options?: SpecialistReviewOptions
): Promise<AgentResult> {
const prompt = `你是${this.agentName},只关注${this.focusPrompt}
输出必须是JSON对象格式:
{"findings": [{"severity": "high"|"medium"|"low", "confidence": 0-1, "path": "文件路径", "line": 正整数, "title": "标题", "detail": "详情", "evidence": "证据", "suggestion": "建议"}]}
每个 finding 的所有字段都是必填的。仅报告有明确证据的问题;无问题时返回空数组。
审查上下文如下:
${toCompactContext(context, {
scopePaths: options?.scopePaths,
maxContextTokens: options?.maxContextTokens,
})}`;
try {
const messages: LLMMessage[] = [
{
role: 'system',
content: withGlobalPrompt(
'你是严格的代码审查专家。返回结构化JSON不输出额外文字。confidence取值范围0到1。line必须是正整数且引用新增行。',
mergeReviewPrompts(config.review.globalPrompt, options?.projectPrompt)
),
},
{ role: 'user', content: prompt },
];
const response = await this.gateway.chatForRole('specialist', {
messages,
temperature: 0,
responseFormat: 'json',
});
const content = response.content;
if (!content) {
return { agentName: this.agentName, findings: [] };
}
const parsed = findingResponseSchema.parse(JSON.parse(content));
const findings = parsed.findings.map((item) => ({
...item,
category: this.category,
fingerprint:
item.fingerprint || buildFingerprint(this.category, item.path, item.line, item.title),
}));
return {
agentName: this.agentName,
findings,
};
} catch (error) {
logger.error(`${this.agentName} 执行失败`, {
runId: run.id,
error: error instanceof Error ? error.message : String(error),
});
return { agentName: this.agentName, findings: [] };
}
}
private async reviewWithReAct(
run: ReviewRun,
context: ReviewContext,
options?: SpecialistReviewOptions
): Promise<AgentResult> {
const maxIterations = Math.max(
1,
options?.maxIterations ?? (options?.mode === 'light' ? 1 : 2)
);
const findingsMap = new Map<string, Omit<Finding, 'id' | 'runId' | 'published'>>();
const compactContext = toCompactContext(context, {
scopePaths: options?.scopePaths,
maxContextTokens: options?.maxContextTokens,
});
const messages: LLMMessage[] = [
{
role: 'system',
content: withGlobalPrompt(
`你是${this.agentName},专注于${this.focusPrompt}
你可以使用以下工具进行深入调查:
${this.toolRegistry!.getAll()
.map((t) => `- ${t.name}: ${t.description}`)
.join('\n')}
工作流程:
1. 分析给定的代码变更
2. 如需更多信息,使用工具调查(如搜索相似代码、分析函数调用)
3. 基于证据报告问题
当你需要使用工具时,直接调用工具即可。
当你完成所有调查并准备输出最终结果时以纯JSON格式返回不要包含任何额外文字
{
"findings": [
{
"severity": "high" | "medium" | "low",
"confidence": 0.0 到 1.0 之间的数字,
"path": "文件路径",
"line": 正整数,引用新增行的行号,
"title": "问题简短标题",
"detail": "问题详细描述",
"evidence": "相关代码片段或证据",
"suggestion": "修复建议"
}
],
"need_more_investigation": false
}
每个 finding 对象的所有字段都是必填的。无问题时返回空数组 {"findings": [], "need_more_investigation": false}。`,
mergeReviewPrompts(config.review.globalPrompt, options?.projectPrompt)
),
},
];
// 添加Few-shot示例如果学习系统可用
if (this.learningSystem) {
try {
const fewShotExamples = await this.learningSystem.generateFewShotExamples(
this.category,
run.owner,
run.repo
);
if (fewShotExamples.length > 0) {
const llmFewShotExamples = fewShotExamples
.map((msg) => {
if (
(msg.role === 'system' || msg.role === 'user' || msg.role === 'assistant') &&
typeof msg.content === 'string'
) {
return { role: msg.role, content: msg.content } as const;
}
return null;
})
.filter(
(msg): msg is { role: 'system' | 'user' | 'assistant'; content: string } =>
msg !== null
);
messages.push(...llmFewShotExamples);
logger.debug(`${this.agentName} 添加了 ${fewShotExamples.length} 条Few-shot示例`, {
runId: run.id,
});
}
} catch (error) {
logger.warn(`${this.agentName} Few-shot示例生成失败`, {
runId: run.id,
error: error instanceof Error ? error.message : String(error),
});
}
}
// 添加当前审查任务
messages.push({
role: 'user',
content: `审查以下代码变更:\n${compactContext}`,
});
try {
for (let iteration = 0; iteration < maxIterations; iteration++) {
logger.info(`${this.agentName} ReAct迭代 ${iteration + 1}/${maxIterations}`, {
runId: run.id,
});
// 仅在最后一轮迭代强制 JSON 输出(无工具调用时解析结果)
// 避免 response_format: json_object 与 tools 参数冲突导致工具不被调用
const isLastIteration = iteration === maxIterations - 1;
const response = await this.gateway.chatForRole('specialist', {
messages,
temperature: 0,
tools: this.toolRegistry!.toToolDefinitions(),
providerOptions: { tool_choice: isLastIteration ? 'none' : 'auto' },
responseFormat: isLastIteration ? 'json' : undefined,
});
// 处理工具调用
if (response.toolCalls.length > 0) {
messages.push({
role: 'assistant',
content: response.content || '',
toolCalls: response.toolCalls,
});
// 执行所有工具调用
const toolResults = await this.executeTools(response.toolCalls, {
workspacePath: context.workspacePath,
mirrorPath: context.mirrorPath,
runId: run.id,
});
// 添加工具结果到对话
for (const toolResult of toolResults) {
messages.push({
role: 'tool',
toolCallId: toolResult.toolCallId,
content: JSON.stringify(toolResult.result || { error: toolResult.error }),
});
}
continue; // 继续下一轮
}
// 解析findings模型选择返回内容而非调用工具
if (response.content) {
try {
const parsed = JSON.parse(response.content);
if (parsed.findings && parsed.findings.length > 0) {
// 使用schema验证findings防止畸形数据流入发布系统
const validated = findingResponseSchema.parse({ findings: parsed.findings });
for (const item of validated.findings) {
const fp =
item.fingerprint ||
buildFingerprint(this.category, item.path, item.line, item.title);
// 基于 fingerprint 去重:后续迭代产生的同一 finding 覆盖前一次
findingsMap.set(fp, {
...item,
category: this.category,
fingerprint: fp,
});
}
}
// 判断是否需要继续调查
if (!parsed.need_more_investigation) {
break;
}
// 模型要求继续调查但没有调用工具:注入 user 消息打破潜在的自我重复
messages.push({
role: 'assistant',
content: response.content,
});
messages.push({
role: 'user',
content:
'请使用工具进行更深入的调查。如果你已经获得了足够的信息,请将 need_more_investigation 设为 false 并输出最终结果。',
});
} catch (parseError) {
// 模型返回了非 JSON 文本(如中文自然语言),不应直接放弃
// 将其作为对话上下文保留,提示模型返回 JSON 格式
logger.warn(`${this.agentName} 响应非 JSON 格式,尝试引导模型返回 JSON`, {
runId: run.id,
error: parseError instanceof Error ? parseError.message : String(parseError),
});
messages.push({
role: 'assistant',
content: response.content,
});
messages.push({
role: 'user',
content:
'你的上一次响应不是有效的 JSON。请以纯 JSON 格式返回结果:{"findings": [...], "need_more_investigation": false}。不要包含任何额外文字。',
});
}
} else {
// 没有内容,结束循环
break;
}
}
return { agentName: this.agentName, findings: Array.from(findingsMap.values()) };
} catch (error) {
logger.error(`${this.agentName} ReAct执行失败`, {
runId: run.id,
error: error instanceof Error ? error.message : String(error),
});
return { agentName: this.agentName, findings: [] };
}
}
private async executeTools(
toolCalls: LLMToolCall[],
context: ToolExecutionContext
): Promise<ToolResult[]> {
const results: ToolResult[] = [];
for (const toolCall of toolCalls) {
const tool = this.toolRegistry!.get(toolCall.name);
if (!tool) {
results.push({
toolCallId: toolCall.id,
success: false,
error: `工具 ${toolCall.name} 未找到`,
});
continue;
}
try {
const params = JSON.parse(toolCall.arguments);
const result = await tool.execute(params, context);
logger.info(`工具调用成功: ${toolCall.name}`, {
runId: context.runId,
params,
});
results.push({
toolCallId: toolCall.id,
success: true,
result,
});
} catch (error) {
logger.error(`工具调用失败: ${toolCall.name}`, {
runId: context.runId,
error: error instanceof Error ? error.message : String(error),
});
results.push({
toolCallId: toolCall.id,
success: false,
error: error instanceof Error ? error.message : String(error),
});
}
}
return results;
}
}

Some files were not shown because too many files have changed in this diff Show More