feat: 添加多Agent审查代理(specialist/reflexion/judge/critic/debate)

SpecialistAgent实现ReAct循环+指纹去重；ReflexionAgent添加自我反思机制；JudgeAgent聚合去重排序；CriticAgent质量评分；DebateOrchestrator多代理辩论 Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-06-12 07:26:51 +00:00 · 2026-03-01 03:30:08 +00:00
parent 956a84acc1
commit 1d9ed3d969
5 changed files with 1107 additions and 0 deletions
--- a/src/review/agents/critic-agent.ts
+++ b/src/review/agents/critic-agent.ts
@@ -0,0 +1,198 @@
+import OpenAI from 'openai';
+import { Finding, ReviewContext } from '../types';
+import { logger } from '../../utils/logger';
+
+export interface CritiqueResult {
+  qualityScore: number; // 0-1
+  issues: CritiqueIssue[];
+  missedIssues: string[];
+  overallAssessment: string;
+}
+
+export interface CritiqueIssue {
+  findingIndex: number;
+  problem: string;
+  suggestion: string;
+  severity: 'high' | 'medium' | 'low';
+}
+
+export class CriticAgent {
+  constructor(
+    private openai: OpenAI,
+    private model: string
+  ) {}
+
+  async critique(
+    findings: Omit<Finding, 'id' | 'runId' | 'published'>[],
+    context: ReviewContext
+  ): Promise<CritiqueResult> {
+    if (findings.length === 0) {
+      return {
+        qualityScore: 1.0,
+        issues: [],
+        missedIssues: [],
+        overallAssessment: '无findings需要评估',
+      };
+    }
+
+    const prompt = `你是严格的代码审查质量评估专家。评估以下审查结果的质量。
+
+审查结果（${findings.length}个问题）：
+${JSON.stringify(findings, null, 2)}
+
+原始代码变更片段（供参考）：
+${context.diff.slice(0, 3000)}
+
+评估标准：
+1. **Evidence充分性**: 证据是否充分支持结论？是否引用了具体代码？
+2. **误报风险**: 是否可能是false positive？是否考虑了上下文？
+3. **Severity准确性**: 严重性评估是否合理？
+4. **Confidence合理性**: 置信度评分是否反映了证据强度？
+5. **Suggestion可行性**: 建议是否具体、可操作？
+6. **遗漏问题**: 是否遗漏了明显的问题？
+
+返回JSON格式：
+{
+  "quality_score": 0.0-1.0,
+  "issues": [
+    {
+      "finding_index": 0,
+      "problem": "证据不足，仅基于猜测",
+      "suggestion": "需要引用具体代码行并说明为何存在问题",
+      "severity": "high" | "medium" | "low"
+    }
+  ],
+  "missed_issues": [
+    "可能遗漏的问题描述"
+  ],
+  "overall_assessment": "总体评估说明"
+}`;
+
+    try {
+      const response = await this.openai.chat.completions.create({
+        model: this.model,
+        temperature: 0.1, // 略高于0以允许批判性思考
+        response_format: { type: 'json_object' },
+        messages: [
+          {
+            role: 'system',
+            content: '你是严格的代码审查质量评估专家，以高标准评估findings的质量。',
+          },
+          { role: 'user', content: prompt },
+        ],
+      });
+
+      const content = response.choices[0]?.message.content;
+      if (!content) {
+        throw new Error('Critic Agent返回空结果');
+      }
+
+      const parsed = JSON.parse(content);
+
+      const result: CritiqueResult = {
+        // 使用 ?? 而非 ||，保留有效的0分（最差评价）
+        qualityScore: parsed.quality_score ?? 0.5,
+        issues: (parsed.issues || []).map((issue: any) => ({
+          findingIndex: issue.finding_index || 0,
+          problem: issue.problem || '',
+          suggestion: issue.suggestion || '',
+          severity: issue.severity || 'medium',
+        })),
+        missedIssues: parsed.missed_issues || [],
+        overallAssessment: parsed.overall_assessment || '',
+      };
+
+      logger.debug('Critic Agent评估完成', {
+        findingsCount: findings.length,
+        qualityScore: result.qualityScore,
+        issuesFound: result.issues.length,
+        missedIssues: result.missedIssues.length,
+      });
+
+      return result;
+    } catch (error) {
+      logger.error('Critic Agent执行失败', {
+        error: error instanceof Error ? error.message : String(error),
+      });
+
+      // 返回默认评估，避免阻塞流程
+      return {
+        qualityScore: 0.7,
+        issues: [],
+        missedIssues: [],
+        overallAssessment: 'Critic Agent执行失败，使用默认评估',
+      };
+    }
+  }
+
+  async evaluateSingleFinding(
+    finding: Omit<Finding, 'id' | 'runId' | 'published'>,
+    context: ReviewContext
+  ): Promise<{
+    isValid: boolean;
+    confidence: number;
+    issues: string[];
+  }> {
+    const prompt = `评估以下代码审查finding的有效性：
+
+Finding:
+- Title: ${finding.title}
+- Detail: ${finding.detail}
+- Evidence: ${finding.evidence}
+- Severity: ${finding.severity}
+- Confidence: ${finding.confidence}
+
+代码上下文：
+${context.diff.slice(0, 2000)}
+
+判断：
+1. 这个finding是否有效（不是误报）？
+2. 置信度评估是否合理？
+3. 有哪些问题或改进建议？
+
+返回JSON：
+{
+  "is_valid": true/false,
+  "confidence": 0.0-1.0,
+  "issues": ["问题描述1", "问题描述2"]
+}`;
+
+    try {
+      const response = await this.openai.chat.completions.create({
+        model: this.model,
+        temperature: 0,
+        response_format: { type: 'json_object' },
+        messages: [
+          {
+            role: 'system',
+            content: '你是代码审查质量评估专家。',
+          },
+          { role: 'user', content: prompt },
+        ],
+      });
+
+      const content = response.choices[0]?.message.content;
+      if (!content) {
+        throw new Error('评估失败');
+      }
+
+      const parsed = JSON.parse(content);
+
+      return {
+        isValid: parsed.is_valid ?? true,
+        confidence: parsed.confidence ?? finding.confidence,
+        issues: parsed.issues || [],
+      };
+    } catch (error) {
+      logger.error('单个finding评估失败', {
+        error: error instanceof Error ? error.message : String(error),
+      });
+
+      return {
+        isValid: true,
+        confidence: finding.confidence,
+        issues: [],
+      };
+    }
+  }
+}
--- a/src/review/agents/debate-orchestrator.ts
+++ b/src/review/agents/debate-orchestrator.ts
@@ -0,0 +1,323 @@
+import OpenAI from 'openai';
+import { SpecialistAgent } from './specialist-agent';
+import { Finding, FindingSeverity } from '../types';
+import { logger } from '../../utils/logger';
+
+interface AgentOpinion {
+  agentName: string;
+  confidence: number;
+  severity: FindingSeverity;
+  reasoning: string;
+  isValid: boolean;
+}
+
+export class DebateOrchestrator {
+  private openai: OpenAI;
+  private model: string;
+
+  constructor(openai: OpenAI, model: string) {
+    this.openai = openai;
+    this.model = model;
+  }
+
+  async conductDebate(
+    finding: Omit<Finding, 'id' | 'runId' | 'published'>,
+    agents: SpecialistAgent[],
+    maxRounds: number = 2
+  ): Promise<Omit<Finding, 'id' | 'runId' | 'published'>> {
+    if (agents.length < 2) {
+      logger.debug('Debate需要至少2个agents，跳过');
+      return finding;
+    }
+
+    logger.info('启动Debate', {
+      finding: finding.title,
+      agentsCount: agents.length,
+      maxRounds,
+    });
+
+    const opinions = new Map<string, AgentOpinion>();
+
+    // 收集初始意见
+    for (const agent of agents) {
+      const opinion = await this.getAgentOpinion(agent, finding);
+      opinions.set((agent as any).agentName, opinion);
+    }
+
+    // 辩论轮次
+    for (let round = 0; round < maxRounds; round++) {
+      logger.debug(`Debate Round ${round + 1}/${maxRounds}`, {
+        finding: finding.title,
+      });
+
+      for (const agent of agents) {
+        const agentName = (agent as any).agentName;
+        const otherOpinions = Array.from(opinions.entries()).filter(([name]) => name !== agentName);
+
+        const revisedOpinion = await this.reviseOpinion(agent, finding, otherOpinions, opinions);
+
+        opinions.set(agentName, revisedOpinion);
+      }
+
+      // 检查是否已达成共识
+      if (this.hasConsensus(opinions)) {
+        logger.info(`Debate在第${round + 1}轮达成共识`, {
+          finding: finding.title,
+        });
+        break;
+      }
+    }
+
+    // 形成共识
+    return this.formConsensus(finding, opinions);
+  }
+
+  private async getAgentOpinion(
+    agent: SpecialistAgent,
+    finding: Omit<Finding, 'id' | 'runId' | 'published'>
+  ): Promise<AgentOpinion> {
+    const agentName = (agent as any).agentName;
+    const prompt = `你是${agentName}。评估以下代码问题的严重性、置信度和有效性。
+
+问题：
+- Title: ${finding.title}
+- Detail: ${finding.detail}
+- Evidence: ${finding.evidence}
+- Current Severity: ${finding.severity}
+- Current Confidence: ${finding.confidence}
+
+从你的专业角度判断：
+1. 这个问题是否真实存在（不是误报）？
+2. 严重性评估是否准确？
+3. 你的置信度是多少？
+4. 你的判断理由？
+
+返回JSON：
+{
+  "is_valid": true/false,
+  "confidence": 0.0-1.0,
+  "severity": "high" | "medium" | "low",
+  "reasoning": "你的判断理由（详细说明）"
+}`;
+
+    try {
+      const response = await this.openai.chat.completions.create({
+        model: this.model,
+        temperature: 0.2,
+        response_format: { type: 'json_object' },
+        messages: [
+          {
+            role: 'system',
+            content: `你是${agentName}，从你的专业角度独立评估代码问题。`,
+          },
+          { role: 'user', content: prompt },
+        ],
+      });
+
+      const content = response.choices[0]?.message.content;
+      if (!content) {
+        throw new Error('Agent opinion返回空');
+      }
+
+      const parsed = JSON.parse(content);
+
+      return {
+        agentName,
+        // 使用 ?? 而非 ||，保留有效的0置信度（完全不确定/强烈拒绝）
+        confidence: parsed.confidence ?? 0.5,
+        severity: parsed.severity || 'medium',
+        reasoning: parsed.reasoning || '',
+        isValid: parsed.is_valid ?? true,
+      };
+    } catch (error) {
+      logger.error(`获取${agentName}意见失败`, {
+        error: error instanceof Error ? error.message : String(error),
+      });
+
+      return {
+        agentName,
+        confidence: finding.confidence,
+        severity: finding.severity,
+        reasoning: '获取意见失败，使用默认值',
+        isValid: true,
+      };
+    }
+  }
+
+  private async reviseOpinion(
+    agent: SpecialistAgent,
+    finding: Omit<Finding, 'id' | 'runId' | 'published'>,
+    otherOpinions: [string, AgentOpinion][],
+    opinions: Map<string, AgentOpinion>
+  ): Promise<AgentOpinion> {
+    const agentName = (agent as any).agentName;
+    const prompt = `你是${agentName}。重新评估以下问题，考虑其他专家的意见。
+
+问题：
+- Title: ${finding.title}
+- Evidence: ${finding.evidence}
+
+其他专家意见：
+${otherOpinions
+  .map(
+    ([name, op]) =>
+      `- ${name}: ${op.isValid ? '有效' : '误报'}, ${op.severity} (置信度 ${op.confidence.toFixed(2)})\n  理由: ${
+        op.reasoning
+      }`
+  )
+  .join('\n')}
+
+基于同行的意见，你是否改变观点？
+
+返回JSON：
+{
+  "is_valid": true/false,
+  "confidence": 0.0-1.0,
+  "severity": "high" | "medium" | "low",
+  "reasoning": "修正后的理由或坚持原判断的原因"
+}`;
+
+    try {
+      const response = await this.openai.chat.completions.create({
+        model: this.model,
+        temperature: 0.3, // 允许一定灵活性
+        response_format: { type: 'json_object' },
+        messages: [
+          {
+            role: 'system',
+            content: `你是${agentName}，根据同行意见重新评估，但也要坚持你的专业判断。`,
+          },
+          { role: 'user', content: prompt },
+        ],
+      });
+
+      const content = response.choices[0]?.message.content;
+      if (!content) {
+        throw new Error('Revised opinion返回空');
+      }
+
+      const parsed = JSON.parse(content);
+
+      return {
+        agentName,
+        // 使用 ?? 而非 ||，保留有效的0置信度（完全不确定/强烈拒绝）
+        confidence: parsed.confidence ?? 0.5,
+        severity: parsed.severity || 'medium',
+        reasoning: parsed.reasoning || '',
+        isValid: parsed.is_valid ?? true,
+      };
+    } catch (error) {
+      logger.error(`${agentName}修订意见失败`, {
+        error: error instanceof Error ? error.message : String(error),
+      });
+
+      // 返回当前意见（从opinions Map中获取）
+      const currentOpinion = opinions.get(agentName);
+      return currentOpinion || {
+        agentName,
+        confidence: 0.5,
+        severity: 'medium',
+        reasoning: '修订失败',
+        isValid: true,
+      };
+    }
+  }
+
+  private hasConsensus(opinions: Map<string, AgentOpinion>): boolean {
+    const votes = Array.from(opinions.values());
+
+    if (votes.length === 0) return true;
+
+    // 检查有效性共识（至少80%同意）
+    const validCount = votes.filter((v) => v.isValid).length;
+    const validRatio = validCount / votes.length;
+
+    if (validRatio >= 0.8 || validRatio <= 0.2) {
+      return true; // 大多数同意有效或无效
+    }
+
+    // 检查严重性共识
+    const severityCounts: Record<FindingSeverity, number> = {
+      high: 0,
+      medium: 0,
+      low: 0,
+    };
+
+    votes.forEach((v) => {
+      severityCounts[v.severity]++;
+    });
+
+    const maxCount = Math.max(...Object.values(severityCounts));
+    const consensusRatio = maxCount / votes.length;
+
+    return consensusRatio >= 0.7; // 70%同意同一严重性
+  }
+
+  private formConsensus(
+    finding: Omit<Finding, 'id' | 'runId' | 'published'>,
+    opinions: Map<string, AgentOpinion>
+  ): Omit<Finding, 'id' | 'runId' | 'published'> {
+    const votes = Array.from(opinions.values());
+
+    if (votes.length === 0) {
+      return finding;
+    }
+
+    // 判断有效性（投票）
+    const validCount = votes.filter((v) => v.isValid).length;
+    const isValid = validCount > votes.length / 2;
+
+    // 如果被判定为无效，降低置信度
+    if (!isValid) {
+      logger.info('Debate判定为无效finding', {
+        finding: finding.title,
+        validVotes: validCount,
+        totalVotes: votes.length,
+      });
+
+      return {
+        ...finding,
+        confidence: Math.min(finding.confidence, 0.4),
+        detail: `${finding.detail}\n\n**Debate结果**: 多数专家认为此问题可能是误报（${validCount}/${votes.length}认为有效）`,
+      };
+    }
+
+    // 计算平均置信度（仅计算认为有效的votes）
+    const validVotes = votes.filter((v) => v.isValid);
+    const avgConfidence = validVotes.reduce((sum, v) => sum + v.confidence, 0) / validVotes.length;
+
+    // 严重性投票（加权）
+    const severityVotes: Record<FindingSeverity, number> = {
+      high: 0,
+      medium: 0,
+      low: 0,
+    };
+
+    validVotes.forEach((vote) => {
+      severityVotes[vote.severity] += vote.confidence;
+    });
+
+    const agreedSeverity = (Object.entries(severityVotes).sort((a, b) => b[1] - a[1])[0][0] as FindingSeverity) || finding.severity;
+
+    // 综合推理
+    const synthesizedDetail = `${finding.detail}\n\n**专家Debate意见汇总：**\n${validVotes
+      .map((v) => `- ${v.agentName} (${v.severity}, 置信度${v.confidence.toFixed(2)}): ${v.reasoning}`)
+      .join('\n')}`;
+
+    logger.info('Debate达成共识', {
+      finding: finding.title,
+      originalSeverity: finding.severity,
+      agreedSeverity,
+      originalConfidence: finding.confidence,
+      avgConfidence,
+      validVotes: validVotes.length,
+    });
+
+    return {
+      ...finding,
+      confidence: avgConfidence,
+      severity: agreedSeverity,
+      detail: synthesizedDetail,
+    };
+  }
+}
--- a/src/review/agents/judge-agent.ts
+++ b/src/review/agents/judge-agent.ts
@@ -0,0 +1,54 @@
+import { ReviewDecision, Finding } from '../types';
+
+const severityWeight: Record<Finding['severity'], number> = {
+  high: 3,
+  medium: 2,
+  low: 1,
+};
+
+function summarizeFindings(findings: Omit<Finding, 'id' | 'runId' | 'published'>[]): string {
+  if (findings.length === 0) {
+    return '本次变更未发现需要立即处理的高置信问题。建议人工快速复核关键业务路径。';
+  }
+
+  const total = findings.length;
+  const high = findings.filter((item) => item.severity === 'high').length;
+  const medium = findings.filter((item) => item.severity === 'medium').length;
+  const low = findings.filter((item) => item.severity === 'low').length;
+
+  return [
+    `本次 AI Agent 审查共识别 ${total} 个问题，其中 high ${high} 个、medium ${medium} 个、low ${low} 个。`,
+    '以下评论按风险优先级自动发布，建议优先处理 high 与 medium 项。',
+  ].join('\n\n');
+}
+
+export class JudgeAgent {
+  judge(results: Omit<Finding, 'id' | 'runId' | 'published'>[]): ReviewDecision {
+    const bestByFingerprint = new Map<string, Omit<Finding, 'id' | 'runId' | 'published'>>();
+
+    for (const finding of results) {
+      const existing = bestByFingerprint.get(finding.fingerprint);
+      if (!existing) {
+        bestByFingerprint.set(finding.fingerprint, finding);
+        continue;
+      }
+
+      const existingWeight = severityWeight[existing.severity] * existing.confidence;
+      const currentWeight = severityWeight[finding.severity] * finding.confidence;
+      if (currentWeight > existingWeight) {
+        bestByFingerprint.set(finding.fingerprint, finding);
+      }
+    }
+
+    const findings = [...bestByFingerprint.values()].sort((a, b) => {
+      const scoreA = severityWeight[a.severity] * a.confidence;
+      const scoreB = severityWeight[b.severity] * b.confidence;
+      return scoreB - scoreA;
+    });
+
+    return {
+      summaryMarkdown: summarizeFindings(findings),
+      findings,
+    };
+  }
+}
--- a/src/review/agents/reflexion-agent.ts
+++ b/src/review/agents/reflexion-agent.ts
@@ -0,0 +1,178 @@
+import OpenAI from 'openai';
+import { SpecialistAgent } from './specialist-agent';
+import { CriticAgent, CritiqueResult } from './critic-agent';
+import { AgentResult, FindingCategory, ReviewContext, ReviewRun, Finding } from '../types';
+import { ToolRegistry } from '../tools/registry';
+import { LearningSystem } from '../learning/learning-system';
+import { logger } from '../../utils/logger';
+import { findingResponseSchema } from '../schema/finding-schema';
+import { createHash } from 'node:crypto';
+
+function buildFingerprint(category: string, path: string, line: number, title: string): string {
+  return createHash('sha256').update(`${category}:${path}:${line}:${title}`).digest('hex').slice(0, 24);
+}
+
+export class ReflexionAgent extends SpecialistAgent {
+  private criticAgent: CriticAgent;
+
+  constructor(
+    openai: OpenAI,
+    model: string,
+    category: FindingCategory,
+    agentName: string,
+    focusPrompt: string,
+    toolRegistry?: ToolRegistry,
+    learningSystem?: LearningSystem
+  ) {
+    super(openai, model, category, agentName, focusPrompt, toolRegistry, learningSystem);
+    this.criticAgent = new CriticAgent(openai, model);
+  }
+
+  async reviewWithReflection(
+    run: ReviewRun,
+    context: ReviewContext,
+    maxReflectionRounds: number = 2
+  ): Promise<AgentResult> {
+    let bestFindings: Omit<Finding, 'id' | 'runId' | 'published'>[] = [];
+    let bestQualityScore = 0;
+    let currentFindings: Omit<Finding, 'id' | 'runId' | 'published'>[] = [];
+
+    for (let round = 0; round < maxReflectionRounds; round++) {
+      logger.info(`${this.agentName} Reflection Round ${round + 1}/${maxReflectionRounds}`, {
+        runId: run.id,
+      });
+
+      // 生成初步findings（首轮或基于上一轮refined结果）
+      const draft = await this.generateDraft(run, context, currentFindings, round);
+
+      // 自我批评
+      const critique = await this.criticAgent.critique(draft, context);
+
+      logger.info(`${this.agentName} Critique结果`, {
+        runId: run.id,
+        round: round + 1,
+        qualityScore: critique.qualityScore,
+        issuesFound: critique.issues.length,
+        missedIssues: critique.missedIssues.length,
+      });
+
+      // 如果质量已经很好，提前结束并保存最佳结果
+      if (critique.qualityScore >= 0.9 && critique.issues.length === 0) {
+        bestFindings = draft;
+        bestQualityScore = critique.qualityScore;
+        logger.info(`${this.agentName} 质量满足要求，提前结束Reflection`, {
+          runId: run.id,
+          finalScore: critique.qualityScore,
+        });
+        break;
+      }
+
+      // 如果这轮质量更好，保存为最佳结果
+      if (critique.qualityScore > bestQualityScore) {
+        bestQualityScore = critique.qualityScore;
+        bestFindings = draft;
+      }
+
+      // 如果还有改进空间，继续优化（refine后需要在下一轮重新评估）
+      if (round < maxReflectionRounds - 1) {
+        currentFindings = await this.refine(draft, critique, context, run);
+      }
+    }
+
+    return {
+      agentName: this.agentName,
+      findings: bestFindings,
+    };
+  }
+
+  private async generateDraft(
+    run: ReviewRun,
+    context: ReviewContext,
+    previousFindings: Omit<Finding, 'id' | 'runId' | 'published'>[],
+    round: number
+  ): Promise<Omit<Finding, 'id' | 'runId' | 'published'>[]> {
+    // 第一轮：使用父类的review方法
+    if (round === 0) {
+      const result = await super.review(run, context);
+      return result.findings;
+    }
+
+    // 后续轮次：在前一轮基础上改进（由refine方法生成）
+    return previousFindings;
+  }
+
+  private async refine(
+    draft: Omit<Finding, 'id' | 'runId' | 'published'>[],
+    critique: CritiqueResult,
+    context: ReviewContext,
+    run: ReviewRun
+  ): Promise<Omit<Finding, 'id' | 'runId' | 'published'>[]> {
+    const prompt = `你是${this.agentName}。根据以下批评意见，改进审查结果。
+
+原始findings（${draft.length}个）：
+${JSON.stringify(draft, null, 2)}
+
+Critic Agent的批评意见：
+质量评分: ${critique.qualityScore}
+发现的问题（${critique.issues.length}个）:
+${critique.issues.map((issue) => `- Finding #${issue.findingIndex}: ${issue.problem}\n  建议: ${issue.suggestion}`).join('\n')}
+
+可能遗漏的问题（${critique.missedIssues.length}个）:
+${critique.missedIssues.map((missed) => `- ${missed}`).join('\n')}
+
+总体评估: ${critique.overallAssessment}
+
+代码上下文：
+${context.diff.slice(0, 3000)}
+
+任务：
+1. 修正有问题的findings（根据批评意见）
+2. 补充遗漏的问题（如果确实存在）
+3. 移除误报
+4. 提升evidence的充分性和具体性
+
+返回改进后的findings JSON数组，格式：
+{
+  "findings": [...]
+}`;
+
+    try {
+      const response = await this.openai.chat.completions.create({
+        model: this.model,
+        temperature: 0.1,
+        response_format: { type: 'json_object' },
+        messages: [
+          {
+            role: 'system',
+            content: `你是${this.agentName}，根据批评反馈改进审查结果。`,
+          },
+          { role: 'user', content: prompt },
+        ],
+      });
+
+      const content = response.choices[0]?.message.content;
+      if (!content) {
+        logger.warn(`${this.agentName} Refine返回空结果，使用原findings`);
+        return draft;
+      }
+
+      const parsed = JSON.parse(content);
+
+      // 使用schema验证refined findings，防止畸形数据流入发布系统
+      const validated = findingResponseSchema.parse({ findings: parsed.findings || draft });
+
+      // 标准化category和fingerprint
+      return validated.findings.map((finding) => ({
+        ...finding,
+        category: this.category,
+        fingerprint: finding.fingerprint || buildFingerprint(this.category, finding.path, finding.line, finding.title),
+      }));
+    } catch (error) {
+      logger.error(`${this.agentName} Refine失败`, {
+        runId: run.id,
+        error: error instanceof Error ? error.message : String(error),
+      });
+      return draft; // 失败时返回原findings
+    }
+  }
+}
--- a/src/review/agents/specialist-agent.ts
+++ b/src/review/agents/specialist-agent.ts
@@ -0,0 +1,354 @@
+import OpenAI from 'openai';
+import { createHash } from 'node:crypto';
+import { logger } from '../../utils/logger';
+import { findingResponseSchema } from '../schema/finding-schema';
+import { AgentResult, Finding, FindingCategory, ReviewContext, ReviewRun } from '../types';
+import { ToolRegistry } from '../tools/registry';
+import type { ToolResult, ToolExecutionContext } from '../tools/types';
+import type { LearningSystem } from '../learning/learning-system';
+
+function buildFingerprint(category: string, path: string, line: number, title: string): string {
+  return createHash('sha256').update(`${category}:${path}:${line}:${title}`).digest('hex').slice(0, 24);
+}
+
+function toCompactContext(context: ReviewContext): string {
+  // 全局上下文大小限制：100k chars（约33k tokens），为系统prompt、few-shot、响应留空间
+  const MAX_CONTEXT_CHARS = 100_000;
+
+  const files = context.changedFiles.map((file) => ({
+    path: file.path,
+    status: file.status,
+    additions: file.additions,
+    deletions: file.deletions,
+  }));
+
+  // 策略：逐步缩减直到满足限制
+  // 1. changedFiles元数据（小且必需）
+  // 2. parsedDiff（关键，逐步减少每个文件的changes数量）
+  // 3. fileContents（最大，按需截断或移除部分文件）
+
+  let maxChangesPerFile = 200;
+  let maxFileContentsEntries = Object.keys(context.fileContents).length;
+
+  // 尝试构建并测量大小
+  const tryBuild = (changesLimit: number, contentEntriesLimit: number): string => {
+    const snippets = context.parsedDiff.map((file) => ({
+      path: file.path,
+      changes: file.changes.slice(0, changesLimit),
+    }));
+
+    const limitedContents: Record<string, string> = {};
+    const contentKeys = Object.keys(context.fileContents);
+    for (let i = 0; i < Math.min(contentEntriesLimit, contentKeys.length); i++) {
+      const key = contentKeys[i];
+      limitedContents[key] = context.fileContents[key];
+    }
+
+    return JSON.stringify(
+      {
+        changedFiles: files,
+        diffSnippets: snippets,
+        fileContents: limitedContents,
+      },
+      null,
+      2
+    );
+  };
+
+  let result = tryBuild(maxChangesPerFile, maxFileContentsEntries);
+
+  // 如果超过限制，逐步缩减
+  while (result.length > MAX_CONTEXT_CHARS && (maxChangesPerFile > 20 || maxFileContentsEntries > 0)) {
+    if (maxChangesPerFile > 20) {
+      maxChangesPerFile = Math.max(20, Math.floor(maxChangesPerFile * 0.7));
+    } else if (maxFileContentsEntries > 0) {
+      maxFileContentsEntries = Math.max(0, Math.floor(maxFileContentsEntries * 0.5));
+    }
+
+    result = tryBuild(maxChangesPerFile, maxFileContentsEntries);
+  }
+
+  // 如果仍然超限，强制截断（保留前N个字符）
+  if (result.length > MAX_CONTEXT_CHARS) {
+    logger.warn('Context size still exceeds limit after reduction, truncating', {
+      originalSize: result.length,
+      limit: MAX_CONTEXT_CHARS,
+    });
+    result = result.slice(0, MAX_CONTEXT_CHARS) + '\n... [truncated]';
+  }
+
+  return result;
+}
+
+export class SpecialistAgent {
+  constructor(
+    protected readonly openai: OpenAI,
+    protected readonly model: string,
+    protected readonly category: FindingCategory,
+    protected readonly agentName: string,
+    protected readonly focusPrompt: string,
+    protected readonly toolRegistry?: ToolRegistry,
+    protected readonly learningSystem?: LearningSystem
+  ) {}
+
+  async review(run: ReviewRun, context: ReviewContext): Promise<AgentResult> {
+    if (!context.diff.trim()) {
+      return { agentName: this.agentName, findings: [] };
+    }
+
+    // 如果没有工具注册表，使用传统单次调用模式
+    if (!this.toolRegistry || this.toolRegistry.getAll().length === 0) {
+      return this.reviewLegacy(run, context);
+    }
+
+    // ReAct循环模式
+    return this.reviewWithReAct(run, context);
+  }
+
+  private async reviewLegacy(run: ReviewRun, context: ReviewContext): Promise<AgentResult> {
+    const prompt = `你是${this.agentName}，只关注${this.focusPrompt}。
+输出必须是JSON对象格式: {"findings": []}。
+仅报告有明确证据的问题；无问题时返回空数组。
+
+审查上下文如下:
+${toCompactContext(context)}`;
+
+    try {
+      const response = await this.openai.chat.completions.create({
+        model: this.model,
+        temperature: 0,
+        response_format: { type: 'json_object' },
+        messages: [
+          {
+            role: 'system',
+            content:
+              '你是严格的代码审查专家。返回结构化JSON，不输出额外文字。confidence取值范围0到1。line必须是正整数且引用新增行。',
+          },
+          { role: 'user', content: prompt },
+        ],
+      });
+
+      const content = response.choices[0]?.message.content;
+      if (!content) {
+        return { agentName: this.agentName, findings: [] };
+      }
+
+      const parsed = findingResponseSchema.parse(JSON.parse(content));
+      const findings = parsed.findings.map((item) => ({
+        ...item,
+        category: this.category,
+        fingerprint: item.fingerprint || buildFingerprint(this.category, item.path, item.line, item.title),
+      }));
+
+      return {
+        agentName: this.agentName,
+        findings,
+      };
+    } catch (error) {
+      logger.error(`${this.agentName} 执行失败`, {
+        runId: run.id,
+        error: error instanceof Error ? error.message : String(error),
+      });
+      return { agentName: this.agentName, findings: [] };
+    }
+  }
+
+  private async reviewWithReAct(run: ReviewRun, context: ReviewContext): Promise<AgentResult> {
+    const maxIterations = 5;
+    const findingsMap = new Map<string, Omit<Finding, 'id' | 'runId' | 'published'>>();
+    const messages: OpenAI.Chat.ChatCompletionMessageParam[] = [
+      {
+        role: 'system',
+        content: `你是${this.agentName}，专注于${this.focusPrompt}。
+
+你可以使用以下工具进行深入调查：
+${this.toolRegistry!.getAll()
+  .map((t) => `- ${t.name}: ${t.description}`)
+  .join('\n')}
+
+工作流程：
+1. 分析给定的代码变更
+2. 如需更多信息，使用工具调查（如搜索相似代码、分析函数调用）
+3. 基于证据报告问题
+
+当你需要使用工具时，直接调用工具即可。
+当你完成所有调查并准备输出最终结果时，以纯JSON格式返回：
+{"findings": [...], "need_more_investigation": false}
+confidence取值范围0到1。line必须是正整数且引用新增行。`,
+      },
+    ];
+
+    // 添加Few-shot示例（如果学习系统可用）
+    if (this.learningSystem) {
+      try {
+        const fewShotExamples = await this.learningSystem.generateFewShotExamples(
+          this.category,
+          run.owner,
+          run.repo
+        );
+        if (fewShotExamples.length > 0) {
+          messages.push(...fewShotExamples);
+          logger.debug(`${this.agentName} 添加了 ${fewShotExamples.length} 条Few-shot示例`, {
+            runId: run.id,
+          });
+        }
+      } catch (error) {
+        logger.warn(`${this.agentName} Few-shot示例生成失败`, {
+          runId: run.id,
+          error: error instanceof Error ? error.message : String(error),
+        });
+      }
+    }
+
+    // 添加当前审查任务
+    messages.push({
+      role: 'user',
+      content: `审查以下代码变更：\n${toCompactContext(context)}`,
+    });
+
+    try {
+      for (let iteration = 0; iteration < maxIterations; iteration++) {
+        logger.info(`${this.agentName} ReAct迭代 ${iteration + 1}/${maxIterations}`, {
+          runId: run.id,
+        });
+
+        // 仅在最后一轮迭代强制 JSON 输出（无工具调用时解析结果）
+        // 避免 response_format: json_object 与 tools 参数冲突导致工具不被调用
+        const isLastIteration = iteration === maxIterations - 1;
+        const response = await this.openai.chat.completions.create({
+          model: this.model,
+          temperature: 0,
+          ...(isLastIteration ? { response_format: { type: 'json_object' as const } } : {}),
+          messages,
+          tools: this.toolRegistry!.toOpenAIFunctions(),
+          tool_choice: isLastIteration ? 'none' : 'auto',
+        });
+
+        const choice = response.choices[0];
+        if (!choice) break;
+
+        // 处理工具调用
+        if (choice.message.tool_calls && choice.message.tool_calls.length > 0) {
+          messages.push(choice.message as OpenAI.Chat.ChatCompletionMessageParam);
+
+          // 执行所有工具调用
+          const toolResults = await this.executeTools(choice.message.tool_calls, {
+            workspacePath: context.workspacePath,
+            mirrorPath: context.mirrorPath,
+            runId: run.id,
+          });
+
+          // 添加工具结果到对话
+          for (const toolResult of toolResults) {
+            messages.push({
+              role: 'tool',
+              tool_call_id: toolResult.toolCallId,
+              content: JSON.stringify(toolResult.result || { error: toolResult.error }),
+            });
+          }
+
+          continue; // 继续下一轮
+        }
+
+        // 解析findings（模型选择返回内容而非调用工具）
+        if (choice.message.content) {
+          try {
+            const parsed = JSON.parse(choice.message.content);
+
+            if (parsed.findings && parsed.findings.length > 0) {
+              // 使用schema验证findings，防止畸形数据流入发布系统
+              const validated = findingResponseSchema.parse({ findings: parsed.findings });
+              for (const item of validated.findings) {
+                const fp = item.fingerprint || buildFingerprint(this.category, item.path, item.line, item.title);
+                // 基于 fingerprint 去重：后续迭代产生的同一 finding 覆盖前一次
+                findingsMap.set(fp, {
+                  ...item,
+                  category: this.category,
+                  fingerprint: fp,
+                });
+              }
+            }
+
+            // 判断是否需要继续调查
+            if (!parsed.need_more_investigation) {
+              break;
+            }
+
+            // 模型要求继续调查但没有调用工具：注入 user 消息打破潜在的自我重复
+            messages.push(choice.message as OpenAI.Chat.ChatCompletionMessageParam);
+            messages.push({
+              role: 'user',
+              content: '请使用工具进行更深入的调查。如果你已经获得了足够的信息，请将 need_more_investigation 设为 false 并输出最终结果。',
+            });
+          } catch (parseError) {
+            logger.error(`${this.agentName} 解析响应失败`, {
+              runId: run.id,
+              error: parseError instanceof Error ? parseError.message : String(parseError),
+            });
+            break;
+          }
+        } else {
+          // 没有内容，结束循环
+          break;
+        }
+      }
+
+      return { agentName: this.agentName, findings: Array.from(findingsMap.values()) };
+    } catch (error) {
+      logger.error(`${this.agentName} ReAct执行失败`, {
+        runId: run.id,
+        error: error instanceof Error ? error.message : String(error),
+      });
+      return { agentName: this.agentName, findings: [] };
+    }
+  }
+
+  private async executeTools(
+    toolCalls: OpenAI.Chat.ChatCompletionMessageToolCall[],
+    context: ToolExecutionContext
+  ): Promise<ToolResult[]> {
+    const results: ToolResult[] = [];
+
+    for (const toolCall of toolCalls) {
+      const tool = this.toolRegistry!.get(toolCall.function.name);
+
+      if (!tool) {
+        results.push({
+          toolCallId: toolCall.id,
+          success: false,
+          error: `工具 ${toolCall.function.name} 未找到`,
+        });
+        continue;
+      }
+
+      try {
+        const params = JSON.parse(toolCall.function.arguments);
+        const result = await tool.execute(params, context);
+
+        logger.info(`工具调用成功: ${toolCall.function.name}`, {
+          runId: context.runId,
+          params,
+        });
+
+        results.push({
+          toolCallId: toolCall.id,
+          success: true,
+          result,
+        });
+      } catch (error) {
+        logger.error(`工具调用失败: ${toolCall.function.name}`, {
+          runId: context.runId,
+          error: error instanceof Error ? error.message : String(error),
+        });
+
+        results.push({
+          toolCallId: toolCall.id,
+          success: false,
+          error: error instanceof Error ? error.message : String(error),
+        });
+      }
+    }
+
+    return results;
+  }
+}