feat: 添加向量记忆和学习系统

基于Qdrant的向量存储实现finding相似度搜索；LearningSystem支持误报学习和Few-shot示例生成 Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-05-31 23:16:46 +00:00 · 2026-03-01 03:28:34 +00:00
parent 6186210b4e
commit 956a84acc1
3 changed files with 445 additions and 0 deletions
--- a/src/review/learning/learning-system.ts
+++ b/src/review/learning/learning-system.ts
@@ -0,0 +1,210 @@
+import { VectorMemoryStore } from '../memory/vector-store';
+import { FileReviewStore } from '../store/file-review-store';
+import { Finding, FindingCategory } from '../types';
+import { logger } from '../../utils/logger';
+import OpenAI from 'openai';
+import config from '../../config';
+
+export class LearningSystem {
+  constructor(
+    private memoryStore: VectorMemoryStore,
+    private store: FileReviewStore
+  ) {}
+
+  async learnFromFalsePositive(
+    finding: Finding,
+    reason: string,
+    owner: string,
+    repo: string
+  ): Promise<void> {
+    // 存储误报模式到向量记忆
+    await this.memoryStore.storeMemory({
+      id: `fp-${finding.id}`,
+      type: 'pattern',
+      content: `False Positive: ${finding.title}\nReason: ${reason}\nEvidence: ${finding.evidence}\nCategory: ${finding.category}`,
+      metadata: {
+        category: finding.category,
+        approved: false,
+        timestamp: new Date().toISOString(),
+        owner,
+        repo,
+        project: `${owner}/${repo}`,
+      },
+    });
+
+    // 查找相似的未发布findings，降低置信度
+    const similarFindings = await this.findSimilarPendingFindings(finding);
+
+    for (const similar of similarFindings) {
+      if (!similar.published && similar.confidence > 0.5) {
+        const newConfidence = Math.max(similar.confidence - 0.2, 0.3);
+        await this.store.updateFindingConfidence(similar.id, newConfidence);
+
+        logger.info('从误报中学习，降低相似finding置信度', {
+          findingId: similar.id,
+          oldConfidence: similar.confidence,
+          newConfidence,
+        });
+      }
+    }
+
+    logger.info('从误报中学习完成', {
+      findingId: finding.id,
+      category: finding.category,
+      updatedSimilar: similarFindings.length,
+    });
+  }
+
+  async generateFewShotExamples(
+    category: FindingCategory,
+    owner?: string,
+    repo?: string
+  ): Promise<OpenAI.Chat.ChatCompletionMessageParam[]> {
+    const targetCount = config.review.fewShotExamplesCount;
+
+    // 提前检查：如果few-shot被禁用（targetCount=0），直接返回，避免无意义的向量查询
+    if (targetCount === 0) {
+      return [];
+    }
+
+    // 构建过滤条件
+    const filter: any = {
+      must: [{ key: 'category', match: { value: category } }],
+    };
+
+    // 如果指定了项目，优先使用该项目的示例
+    if (owner && repo) {
+      filter.must.push({ key: 'project', match: { value: `${owner}/${repo}` } });
+    }
+
+    // 使用category名称作为通用查询而非空字符串，避免无意义的embedding调用
+    const categoryQuery = `${category} issues in code`;
+
+    // 获取已批准的正样本
+    const approvedFilter = {
+      must: [...filter.must, { key: 'approved', match: { value: true } }],
+    };
+    const approved = await this.memoryStore.searchSimilar(categoryQuery, 10, approvedFilter);
+
+    // 获取误报的负样本
+    const rejectedFilter = {
+      must: [...filter.must, { key: 'approved', match: { value: false } }],
+    };
+    const rejected = await this.memoryStore.searchSimilar(categoryQuery, 5, rejectedFilter);
+
+    // 如果项目内示例不足，补充全局示例
+    if (approved.length < targetCount) {
+      const globalApproved = await this.memoryStore.searchSimilar(categoryQuery, 10, {
+        must: [
+          { key: 'category', match: { value: category } },
+          { key: 'approved', match: { value: true } },
+        ],
+      });
+      approved.push(...globalApproved.filter((a) => !approved.find((e) => e.entry.id === a.entry.id)));
+    }
+
+    const examples: OpenAI.Chat.ChatCompletionMessageParam[] = [];
+
+    const negativeCount = Math.floor(targetCount * 0.4);
+
+    // 添加正样本示例
+    for (const a of approved.slice(0, targetCount)) {
+      examples.push({
+        role: 'user',
+        content: `审查这段代码变更，关注${category}相关问题：\n${a.entry.content}`,
+      });
+      examples.push({
+        role: 'assistant',
+        content: JSON.stringify({
+          findings: [
+            {
+              title: a.entry.content.split('\n')[0].replace('False Positive: ', ''),
+              category,
+              severity: a.entry.metadata.severity || 'medium',
+              valid: true,
+            },
+          ],
+        }),
+      });
+    }
+
+    // 添加负样本示例（误报）
+    for (const r of rejected.slice(0, negativeCount)) {
+      examples.push({
+        role: 'user',
+        content: `审查这段代码变更，关注${category}相关问题：\n${r.entry.content}`,
+      });
+      examples.push({
+        role: 'assistant',
+        content: JSON.stringify({
+          findings: [],
+          reason: '历史反馈表明这类情况不应报告为问题',
+        }),
+      });
+    }
+
+    logger.debug('生成Few-shot示例', {
+      category,
+      positiveExamples: approved.length,
+      negativeExamples: rejected.length,
+      totalMessages: examples.length,
+    });
+
+    return examples;
+  }
+
+  private async findSimilarPendingFindings(_finding: Finding): Promise<Finding[]> {
+    // 这里简化实现，实际应该查询数据库中相似的findings
+    // 由于FileReviewStore没有这个方法，我们暂时返回空数组
+    // 在实际部署时需要扩展FileReviewStore
+    return [];
+  }
+
+  async learnFromApproval(
+    finding: Finding,
+    _owner: string,
+    _repo: string
+  ): Promise<void> {
+    // 将已批准的finding存储为正样本
+    await this.memoryStore.storeFinding(finding, true, _owner, _repo);
+
+    logger.info('从批准中学习完成', {
+      findingId: finding.id,
+      category: finding.category,
+      severity: finding.severity,
+    });
+  }
+
+  async getConfidenceAdjustment(
+    finding: Omit<Finding, 'id' | 'runId' | 'published'>,
+    owner: string,
+    repo: string
+  ): Promise<number> {
+    // 搜索相似的误报（优先同一项目）
+    const query = `${finding.title}\n${finding.evidence}`;
+    const similarFalsePositives = await this.memoryStore.searchSimilar(query, 3, {
+      must: [
+        { key: 'type', match: { value: 'pattern' } },
+        { key: 'category', match: { value: finding.category } },
+        { key: 'project', match: { value: `${owner}/${repo}` } },
+      ],
+    });
+
+    if (similarFalsePositives.length === 0) {
+      return 0; // 无需调整
+    }
+
+    // 根据相似度计算置信度惩罚
+    const maxSimilarity = Math.max(...similarFalsePositives.map((fp) => fp.score));
+
+    if (maxSimilarity > 0.9) {
+      return -0.3; // 高度相似的误报，大幅降低置信度
+    } else if (maxSimilarity > 0.8) {
+      return -0.15; // 中度相似，适度降低
+    } else if (maxSimilarity > 0.7) {
+      return -0.05; // 低度相似，略微降低
+    }
+
+    return 0;
+  }
+}
--- a/src/review/memory/types.ts
+++ b/src/review/memory/types.ts
@@ -0,0 +1,29 @@
+export interface MemoryEntry {
+  id: string;
+  type: 'finding' | 'feedback' | 'pattern';
+  content: string;
+  embedding?: number[];
+  metadata: {
+    category?: string;
+    severity?: string;
+    approved?: boolean;
+    timestamp: string;
+    project?: string;
+    owner?: string;
+    repo?: string;
+  };
+}
+
+export interface MemorySearchResult {
+  entry: MemoryEntry;
+  score: number;
+  distance: number;
+}
+
+export interface FeedbackRecord {
+  findingId: string;
+  approved: boolean;
+  reason: string;
+  timestamp: string;
+  reviewer?: string;
+}
--- a/src/review/memory/vector-store.ts
+++ b/src/review/memory/vector-store.ts
@@ -0,0 +1,206 @@
+import { QdrantClient } from '@qdrant/js-client-rest';
+import OpenAI from 'openai';
+import { MemoryEntry, MemorySearchResult } from './types';
+import { Finding } from '../types';
+import { logger } from '../../utils/logger';
+
+export class VectorMemoryStore {
+  private client: QdrantClient;
+  private openai: OpenAI;
+  private collectionName = 'code_review_memory';
+  private initialized = false;
+
+  constructor(qdrantUrl: string, openaiClient: OpenAI) {
+    this.client = new QdrantClient({ url: qdrantUrl });
+    this.openai = openaiClient;
+  }
+
+  async initialize(): Promise<void> {
+    if (this.initialized) {
+      return;
+    }
+
+    try {
+      const collections = await this.client.getCollections();
+      const exists = collections.collections.some((c) => c.name === this.collectionName);
+
+      if (!exists) {
+        await this.client.createCollection(this.collectionName, {
+          vectors: {
+            size: 1536, // text-embedding-3-small dimension
+            distance: 'Cosine',
+          },
+        });
+        logger.info('向量记忆集合已创建', { collection: this.collectionName });
+      }
+
+      this.initialized = true;
+      logger.info('向量记忆系统已初始化');
+    } catch (error) {
+      logger.error('向量记忆系统初始化失败', {
+        error: error instanceof Error ? error.message : String(error),
+      });
+      throw error;
+    }
+  }
+
+  async storeMemory(entry: MemoryEntry): Promise<void> {
+    await this.initialize();
+
+    const embedding = await this.getEmbedding(entry.content);
+
+    await this.client.upsert(this.collectionName, {
+      points: [
+        {
+          id: entry.id,
+          vector: embedding,
+          payload: {
+            type: entry.type,
+            content: entry.content,
+            ...entry.metadata,
+          },
+        },
+      ],
+    });
+
+    logger.debug('记忆已存储', {
+      id: entry.id,
+      type: entry.type,
+      category: entry.metadata.category,
+    });
+  }
+
+  async searchSimilar(
+    query: string,
+    limit: number = 5,
+    filter?: any
+  ): Promise<MemorySearchResult[]> {
+    await this.initialize();
+
+    const queryEmbedding = await this.getEmbedding(query);
+
+    const results = await this.client.search(this.collectionName, {
+      vector: queryEmbedding,
+      limit,
+      filter,
+    });
+
+    return results.map((r) => ({
+      entry: {
+        id: String(r.id),
+        type: r.payload?.type as any,
+        content: r.payload?.content as string,
+        metadata: {
+          category: r.payload?.category as string,
+          severity: r.payload?.severity as string,
+          approved: r.payload?.approved as boolean,
+          timestamp: r.payload?.timestamp as string,
+          project: r.payload?.project as string,
+          owner: r.payload?.owner as string,
+          repo: r.payload?.repo as string,
+        },
+      },
+      score: r.score,
+      distance: 1 - r.score,
+    }));
+  }
+
+  private async getEmbedding(text: string): Promise<number[]> {
+    try {
+      const response = await this.openai.embeddings.create({
+        model: 'text-embedding-3-small',
+        input: text.slice(0, 8000), // 限制长度防止超出token限制
+      });
+
+      return response.data[0].embedding;
+    } catch (error) {
+      logger.error('生成embedding失败', {
+        error: error instanceof Error ? error.message : String(error),
+      });
+      throw error;
+    }
+  }
+
+  async storeFinding(finding: Finding, approved: boolean, owner: string, repo: string): Promise<void> {
+    const content = `${finding.title}\n${finding.detail}\nEvidence: ${finding.evidence}`;
+
+    // 使用repo-scoped ID防止不同仓库的findings相互覆盖
+    const scopedId = `${owner}/${repo}:${finding.fingerprint}`;
+
+    await this.storeMemory({
+      id: scopedId,
+      type: 'finding',
+      content,
+      metadata: {
+        category: finding.category,
+        severity: finding.severity,
+        approved,
+        timestamp: new Date().toISOString(),
+        owner,
+        repo,
+        project: `${owner}/${repo}`,
+      },
+    });
+  }
+
+  async getHistoricalContext(
+    currentFinding: Partial<Finding>,
+    owner: string,
+    repo: string
+  ): Promise<string> {
+    const query = `${currentFinding.title}\n${currentFinding.evidence || ''}`;
+
+    // 优先搜索同一项目的相似问题
+    const projectSimilar = await this.searchSimilar(query, 2, {
+      must: [
+        { key: 'approved', match: { value: true } },
+        { key: 'project', match: { value: `${owner}/${repo}` } },
+      ],
+    });
+
+    // 如果项目内没有足够相似问题，搜索全局
+    let similar = projectSimilar;
+    if (similar.length < 2) {
+      const globalSimilar = await this.searchSimilar(query, 3, {
+        must: [{ key: 'approved', match: { value: true } }],
+      });
+      similar = [...projectSimilar, ...globalSimilar].slice(0, 3);
+    }
+
+    if (similar.length === 0) {
+      return '';
+    }
+
+    return `\n\n历史相似问题参考：\n${similar
+      .map(
+        (s, i) =>
+          `${i + 1}. ${s.entry.content.split('\n')[0]} (相似度: ${(s.score * 100).toFixed(1)}%, 项目: ${
+            s.entry.metadata.project || '未知'
+          })`
+      )
+      .join('\n')}`;
+  }
+
+  async storeFeedback(
+    findingId: string,
+    approved: boolean,
+    reason: string,
+    owner: string,
+    repo: string
+  ): Promise<void> {
+    const content = `Feedback: ${approved ? 'Approved' : 'Rejected'}\nReason: ${reason}\nFinding ID: ${findingId}`;
+
+    await this.storeMemory({
+      id: `feedback-${findingId}-${Date.now()}`,
+      type: 'feedback',
+      content,
+      metadata: {
+        approved,
+        timestamp: new Date().toISOString(),
+        owner,
+        repo,
+        project: `${owner}/${repo}`,
+      },
+    });
+  }
+}