diff --git a/bun.lock b/bun.lock index 88ef8ce..0163a65 100644 --- a/bun.lock +++ b/bun.lock @@ -14,6 +14,7 @@ "hono": "^4.11.9", "lodash-es": "^4.17.21", "openai": "^4.87.3", + "tokenlens": "^1.3.1", "zod": "^3.25.1", "zod-to-json-schema": "^3.25.1", }, @@ -142,6 +143,14 @@ "@sindresorhus/merge-streams": ["@sindresorhus/merge-streams@4.0.0", "", {}, "sha512-tlqY9xq5ukxTUZBmoOp+m61cqwQD5pHJtFY3Mn8CA8ps6yghLH/Hw8UPdqg4OLmFW3IFlcXnQNmo/dh8HzXYIQ=="], + "@tokenlens/core": ["@tokenlens/core@1.3.0", "", {}, "sha512-d8YNHNC+q10bVpi95fELJwJyPVf1HfvBEI18eFQxRSZTdByXrP+f/ZtlhSzkx0Jl0aEmYVeBA5tPeeYRioLViQ=="], + + "@tokenlens/fetch": ["@tokenlens/fetch@1.3.0", "", { "dependencies": { "@tokenlens/core": "1.3.0" } }, "sha512-RONDRmETYly9xO8XMKblmrZjKSwCva4s5ebJwQNfNlChZoA5kplPoCgnWceHnn1J1iRjLVlrCNB43ichfmGBKQ=="], + + "@tokenlens/helpers": ["@tokenlens/helpers@1.3.1", "", { "dependencies": { "@tokenlens/core": "1.3.0", "@tokenlens/fetch": "1.3.0" } }, "sha512-t6yL8N6ES8337E6eVSeH4hCKnPdWkZRFpupy9w5E66Q9IeqQ9IO7XQ6gh12JKjvWiRHuyyJ8MBP5I549Cr41EQ=="], + + "@tokenlens/models": ["@tokenlens/models@1.3.0", "", { "dependencies": { "@tokenlens/core": "1.3.0" } }, "sha512-9mx7ZGeewW4ndXAiD7AT1bbCk4OpJeortbjHHyNkgap+pMPPn1chY6R5zqe1ggXIUzZ2l8VOAKfPqOvpcrisJw=="], + "@types/bun": ["@types/bun@1.3.10", "", { "dependencies": { "bun-types": "1.3.10" } }, "sha512-0+rlrUrOrTSskibryHbvQkDOWRJwJZqZlxrUs1u4oOoTln8+WIXBPmAuCF35SWB2z4Zl3E84Nl/D0P7803nigQ=="], "@types/lodash": ["@types/lodash@4.17.24", "", {}, "sha512-gIW7lQLZbue7lRSWEFql49QJJWThrTFFeIMJdp3eH4tKoxm1OvEPg02rm4wCCSHS0cL3/Fizimb35b7k8atwsQ=="], @@ -680,6 +689,8 @@ "to-regex-range": ["to-regex-range@5.0.1", "", { "dependencies": { "is-number": "^7.0.0" } }, "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ=="], + "tokenlens": ["tokenlens@1.3.1", "", { "dependencies": { "@tokenlens/core": "1.3.0", "@tokenlens/fetch": "1.3.0", "@tokenlens/helpers": "1.3.1", "@tokenlens/models": "1.3.0" } }, "sha512-7oxmsS5PNCX3z+b+z07hL5vCzlgHKkCGrEQjQmWl5l+v5cUrtL7S1cuST4XThaL1XyjbTX8J5hfP0cjDJRkaLA=="], + "tr46": ["tr46@0.0.3", "", {}, "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="], "traverse": ["traverse@0.6.8", "", {}, "sha512-aXJDbk6SnumuaZSANd21XAo15ucCDE38H4fkqiGsc3MhCK+wOlZvLP9cB/TvpHT0mOyWgC4Z8EwRlzqYSUzdsA=="], @@ -778,6 +789,8 @@ "env-ci/execa": ["execa@8.0.1", "", { "dependencies": { "cross-spawn": "^7.0.3", "get-stream": "^8.0.1", "human-signals": "^5.0.0", "is-stream": "^3.0.0", "merge-stream": "^2.0.0", "npm-run-path": "^5.1.0", "onetime": "^6.0.0", "signal-exit": "^4.1.0", "strip-final-newline": "^3.0.0" } }, "sha512-VyhnebXciFV2DESc+p6B+y0LjSm0krU4OgJN44qFAhBY0TJ+1V61tYD2+wHusZ6F9n5K+vl8k0sTy7PEfV4qpg=="], + "fdir/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="], + "fetch-blob/web-streams-polyfill": ["web-streams-polyfill@3.3.3", "", {}, "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw=="], "foreground-child/signal-exit": ["signal-exit@4.1.0", "", {}, "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw=="], diff --git a/package.json b/package.json index 0767f50..b15aa79 100644 --- a/package.json +++ b/package.json @@ -15,6 +15,7 @@ "hono": "^4.11.9", "lodash-es": "^4.17.21", "openai": "^4.87.3", + "tokenlens": "^1.3.1", "zod": "^3.25.1", "zod-to-json-schema": "^3.25.1" }, diff --git a/src/review/agents/critic-agent.ts b/src/review/agents/critic-agent.ts index db2220d..f90322c 100644 --- a/src/review/agents/critic-agent.ts +++ b/src/review/agents/critic-agent.ts @@ -3,6 +3,7 @@ import type { LLMGateway } from '../../llm/gateway'; import type { LLMMessage } from '../../llm/types'; import { withGlobalPrompt } from '../../utils/global-prompt'; import { logger } from '../../utils/logger'; +import { tokenCounter } from '../context/token-counter'; import { Finding, ReviewContext } from '../types'; export interface CritiqueResult { @@ -41,7 +42,7 @@ export class CriticAgent { ${JSON.stringify(findings, null, 2)} 原始代码变更片段(供参考): -${context.diff.slice(0, 3000)} +${tokenCounter.clip(context.diff, 1000)} 评估标准: 1. **Evidence充分性**: 证据是否充分支持结论?是否引用了具体代码? @@ -147,7 +148,7 @@ Finding: - Confidence: ${finding.confidence} 代码上下文: -${context.diff.slice(0, 2000)} +${tokenCounter.clip(context.diff, 700)} 判断: 1. 这个finding是否有效(不是误报)? diff --git a/src/review/agents/reflexion-agent.ts b/src/review/agents/reflexion-agent.ts index 9445c7f..d75edf4 100644 --- a/src/review/agents/reflexion-agent.ts +++ b/src/review/agents/reflexion-agent.ts @@ -4,6 +4,7 @@ import type { LLMGateway } from '../../llm/gateway'; import type { LLMMessage } from '../../llm/types'; import { withGlobalPrompt } from '../../utils/global-prompt'; import { logger } from '../../utils/logger'; +import { tokenCounter } from '../context/token-counter'; import { LearningSystem } from '../learning/learning-system'; import { findingResponseSchema } from '../schema/finding-schema'; import { ToolRegistry } from '../tools/registry'; @@ -128,7 +129,7 @@ ${critique.missedIssues.map((missed) => `- ${missed}`).join('\n')} 总体评估: ${critique.overallAssessment} 代码上下文: -${context.diff.slice(0, 3000)} +${tokenCounter.clip(context.diff, 1000)} 任务: 1. 修正有问题的findings(根据批评意见) diff --git a/src/review/agents/specialist-agent.ts b/src/review/agents/specialist-agent.ts index 6df7500..685d7d1 100644 --- a/src/review/agents/specialist-agent.ts +++ b/src/review/agents/specialist-agent.ts @@ -9,6 +9,7 @@ import { findingResponseSchema } from '../schema/finding-schema'; import { ToolRegistry } from '../tools/registry'; import type { ToolExecutionContext, ToolResult } from '../tools/types'; import { AgentResult, Finding, FindingCategory, ReviewContext, ReviewRun } from '../types'; +import { tokenCounter } from '../context/token-counter'; function buildFingerprint(category: string, path: string, line: number, title: string): string { return createHash('sha256') @@ -18,8 +19,8 @@ function buildFingerprint(category: string, path: string, line: number, title: s } function toCompactContext(context: ReviewContext): string { - // 全局上下文大小限制:100k chars(约33k tokens),为系统prompt、few-shot、响应留空间 - const MAX_CONTEXT_CHARS = 100_000; + // Token-based budget: 25k tokens for context, leaving room for system prompt + few-shot + response + const MAX_CONTEXT_TOKENS = 25_000; const files = context.changedFiles.map((file) => ({ path: file.path, @@ -28,7 +29,7 @@ function toCompactContext(context: ReviewContext): string { deletions: file.deletions, })); - // 策略:逐步缩减直到满足限制 + // 策略:逐步缩减直到满足 token 限制 // 1. changedFiles元数据(小且必需) // 2. parsedDiff(关键,逐步减少每个文件的changes数量) // 3. fileContents(最大,按需截断或移除部分文件) @@ -36,7 +37,6 @@ function toCompactContext(context: ReviewContext): string { let maxChangesPerFile = 200; let maxFileContentsEntries = Object.keys(context.fileContents).length; - // 尝试构建并测量大小 const tryBuild = (changesLimit: number, contentEntriesLimit: number): string => { const snippets = context.parsedDiff.map((file) => ({ path: file.path, @@ -63,9 +63,9 @@ function toCompactContext(context: ReviewContext): string { let result = tryBuild(maxChangesPerFile, maxFileContentsEntries); - // 如果超过限制,逐步缩减 + // 如果超过 token 限制,逐步缩减 while ( - result.length > MAX_CONTEXT_CHARS && + tokenCounter.count(result) > MAX_CONTEXT_TOKENS && (maxChangesPerFile > 20 || maxFileContentsEntries > 0) ) { if (maxChangesPerFile > 20) { @@ -77,13 +77,13 @@ function toCompactContext(context: ReviewContext): string { result = tryBuild(maxChangesPerFile, maxFileContentsEntries); } - // 如果仍然超限,强制截断(保留前N个字符) - if (result.length > MAX_CONTEXT_CHARS) { - logger.warn('Context size still exceeds limit after reduction, truncating', { - originalSize: result.length, - limit: MAX_CONTEXT_CHARS, + // 如果仍然超限,强制截断 + if (tokenCounter.count(result) > MAX_CONTEXT_TOKENS) { + logger.warn('Context size still exceeds token limit after reduction, truncating', { + estimatedTokens: tokenCounter.count(result), + limit: MAX_CONTEXT_TOKENS, }); - result = `${result.slice(0, MAX_CONTEXT_CHARS)}\n... [truncated]`; + result = tokenCounter.clip(result, MAX_CONTEXT_TOKENS); } return result; diff --git a/src/review/context/__tests__/token-counter.test.ts b/src/review/context/__tests__/token-counter.test.ts new file mode 100644 index 0000000..0296d74 --- /dev/null +++ b/src/review/context/__tests__/token-counter.test.ts @@ -0,0 +1,124 @@ +// @ts-ignore bun:test is provided by Bun at runtime +declare module 'bun:test' { + // @ts-ignore bun:test types may already exist + export const describe: any; + // @ts-ignore bun:test types may already exist + export const test: any; + // @ts-ignore bun:test types may already exist + export const expect: any; +} + +// @ts-ignore bun:test is provided by Bun at runtime +import { describe, expect, test } from 'bun:test'; + +import { TokenCounter, tokenCounter } from '../token-counter'; + +describe('TokenCounter.count', () => { + test('returns 0 for empty string', () => { + const counter = new TokenCounter(); + expect(counter.count('')).toBe(0); + }); + + test('uses ceil(length / 3.5) for known string', () => { + const counter = new TokenCounter(); + expect(counter.count('hello')).toBe(2); + }); + + test('handles longer text using same formula', () => { + const counter = new TokenCounter(); + const text = 'a'.repeat(36); + expect(counter.count(text)).toBe(Math.ceil(36 / 3.5)); + }); +}); + +describe('TokenCounter.clip', () => { + test('returns text as-is when within budget', () => { + const counter = new TokenCounter(); + const text = 'short text'; + expect(counter.clip(text, 100)).toBe(text); + }); + + test('truncates with message when exceeding budget', () => { + const counter = new TokenCounter(); + const maxTokens = 4; + const maxChars = Math.floor(maxTokens * 3.5); + const text = 'abcdefghijklmnopqrstuvwxyz'; + + const clipped = counter.clip(text, maxTokens); + + expect(clipped.startsWith(text.slice(0, maxChars))).toBe(true); + expect(clipped).toContain('[truncated, exceeded 4 token budget]'); + expect(clipped).toBe(`${text.slice(0, maxChars)}\n... [truncated, exceeded 4 token budget]`); + }); +}); + +describe('TokenCounter.getContextWindow', () => { + test('returns known context window for gpt-4o', () => { + const counter = new TokenCounter(); + expect(counter.getContextWindow('gpt-4o')).toBe(128000); + }); + + test('returns known context window for claude-sonnet-4-20250514', () => { + const counter = new TokenCounter(); + expect(counter.getContextWindow('claude-sonnet-4-20250514')).toBe(200000); + }); + + test('returns default context window for versioned model not in registry', () => { + const counter = new TokenCounter(); + // tokenlens static registry may not have dated variants — falls back to default + expect(counter.getContextWindow('gpt-4o-2024-08-06')).toBe(128000); + }); + + test('returns default context window for unknown models', () => { + const counter = new TokenCounter(); + expect(counter.getContextWindow('unknown-model-xyz')).toBe(128000); + }); +}); + +describe('TokenCounter.getUsableBudget', () => { + test('returns contextWindow - 4000 for known model', () => { + const counter = new TokenCounter(); + expect(counter.getUsableBudget('gpt-4o')).toBe(124000); + }); + + test('never returns less than 1000 for tiny context window', () => { + class TinyWindowTokenCounter extends TokenCounter { + getContextWindow(_model: string): number { + return 500; + } + } + + const counter = new TinyWindowTokenCounter(); + expect(counter.getUsableBudget('tiny-model')).toBe(1000); + }); +}); + +describe('TokenCounter exports and constructor options', () => { + test('exports singleton tokenCounter instance', () => { + expect(tokenCounter).toBeInstanceOf(TokenCounter); + }); + + test('supports custom charsPerToken in constructor', () => { + const counter = new TokenCounter(2); + expect(counter.count('hello')).toBe(3); + }); +}); + +describe('TokenCounter dynamic catalog', () => { + test('hasCatalog is false before refreshCatalog', () => { + const counter = new TokenCounter(); + expect(counter.hasCatalog).toBe(false); + }); + + test('getContextWindow works without catalog (static fallback)', () => { + const counter = new TokenCounter(); + // Should use static tokenlens data, not crash + expect(counter.getContextWindow('gpt-4o')).toBe(128000); + }); + + test('stopRefresh is safe to call without active timer', () => { + const counter = new TokenCounter(); + // Should not throw + counter.stopRefresh(); + }); +}); diff --git a/src/review/context/diff-extractor.ts b/src/review/context/diff-extractor.ts index 7a723ac..0a4683a 100644 --- a/src/review/context/diff-extractor.ts +++ b/src/review/context/diff-extractor.ts @@ -1,8 +1,10 @@ import { lstat, readFile } from 'node:fs/promises'; import path from 'node:path'; +import { logger } from '../../utils/logger'; import { ChangedFile, DiffFile, ReviewContext, ReviewRun } from '../types'; import { LocalRepoManager } from './local-repo-manager'; import { SandboxExec } from './sandbox-exec'; +import { tokenCounter } from './token-counter'; function toStatus(status: string): ChangedFile['status'] { const value = status.trim().charAt(0).toUpperCase(); @@ -65,10 +67,23 @@ export class DiffExtractor { const fileContents = await this.readChangedFileContents(workspacePath, changedFiles); + // Token-budget guard: clip raw diff to 30k tokens to prevent context overflow + // This is the raw diff that gets passed around; toCompactContext() does further reduction + const MAX_RAW_DIFF_TOKENS = 30_000; + let clippedDiff = diff; + const diffTokens = tokenCounter.count(diff); + if (diffTokens > MAX_RAW_DIFF_TOKENS) { + logger.warn('Raw diff exceeds token budget, clipping', { + estimatedTokens: diffTokens, + limit: MAX_RAW_DIFF_TOKENS, + }); + clippedDiff = tokenCounter.clip(diff, MAX_RAW_DIFF_TOKENS); + } + return { workspacePath, mirrorPath, - diff, + diff: clippedDiff, changedFiles, parsedDiff, fileContents, diff --git a/src/review/context/token-counter.ts b/src/review/context/token-counter.ts new file mode 100644 index 0000000..1159592 --- /dev/null +++ b/src/review/context/token-counter.ts @@ -0,0 +1,253 @@ +/** + * Token Counter — estimates token counts for context budget management. + * + * Uses a simple chars-to-tokens ratio estimation by default (chars / 3.5), + * which is a widely-used approximation for English/code content. + * + * Context window lookup strategy (ordered by priority): + * 1. Dynamic catalog fetched from models.dev at startup (3000+ models, always fresh) + * 2. Static tokenlens built-in registry (offline fallback) + * 3. Conservative 128k default for truly unknown models + */ + +import type { ModelCatalog, ProviderModel } from '@tokenlens/core'; +import { fetchModels, getContextWindow as tlGetContextWindow } from 'tokenlens'; + +import { logger } from '../../utils/logger'; + +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +/** + * Default chars-per-token ratio. + * English text: ~4 chars/token. Code: ~3.5 chars/token. + * We use 3.5 to be conservative (slightly over-estimate tokens). + */ +const DEFAULT_CHARS_PER_TOKEN = 3.5; + +/** + * Default context window when model is unknown. + * Conservative: assume 128k to avoid overflow on most modern models. + */ +const DEFAULT_CONTEXT_WINDOW = 128_000; + +/** + * Reserve tokens for system prompt, output, and overhead. + * This is subtracted from the total context window to get the usable budget. + */ +const RESERVED_TOKENS = 4_000; + +/** How often to refresh the dynamic catalog (24 hours). */ +const CATALOG_TTL_MS = 24 * 60 * 60 * 1000; + +// --------------------------------------------------------------------------- +// TokenCounter +// --------------------------------------------------------------------------- + +export class TokenCounter { + private readonly charsPerToken: number; + + /** Dynamic catalog fetched from models.dev. null until first refresh. */ + private catalog: ModelCatalog | null = null; + private refreshTimer: ReturnType | null = null; + + constructor(charsPerToken = DEFAULT_CHARS_PER_TOKEN) { + this.charsPerToken = charsPerToken; + } + + // ------------------------------------------------------------------------- + // Token estimation + // ------------------------------------------------------------------------- + + /** + * Estimate token count for a string. + */ + count(text: string): number { + return Math.ceil(text.length / this.charsPerToken); + } + + /** + * Clip text to fit within a token budget. + * Returns the text truncated to approximately `maxTokens` tokens. + */ + clip(text: string, maxTokens: number): string { + const maxChars = Math.floor(maxTokens * this.charsPerToken); + if (text.length <= maxChars) { + return text; + } + return `${text.slice(0, maxChars)}\n... [truncated, exceeded ${maxTokens} token budget]`; + } + + // ------------------------------------------------------------------------- + // Context window lookup + // ------------------------------------------------------------------------- + + /** + * Get the usable token budget for a given model. + * = context_window - reserved_tokens + */ + getUsableBudget(model: string): number { + const contextWindow = this.getContextWindow(model); + const budget = contextWindow - RESERVED_TOKENS; + return Math.max(budget, 1000); // never return less than 1000 + } + + /** + * Get the context window size for a model. + * + * Lookup order: + * 1. Dynamic catalog (from models.dev, refreshed every 24h) + * 2. Static tokenlens registry (built into the package) + * 3. DEFAULT_CONTEXT_WINDOW (128k) + */ + getContextWindow(model: string): number { + // 1. Try dynamic catalog first + const dynamicResult = this.lookupFromCatalog(model); + if (dynamicResult && dynamicResult > 0) { + return dynamicResult; + } + + // 2. Fall back to static tokenlens registry + const staticResult = tlGetContextWindow(model); + const staticWindow = staticResult?.combinedMax ?? staticResult?.totalMax; + if (staticWindow && staticWindow > 0) { + return staticWindow; + } + + // 3. Default + logger.debug( + `Unknown model "${model}", using default context window ${DEFAULT_CONTEXT_WINDOW}` + ); + return DEFAULT_CONTEXT_WINDOW; + } + + // ------------------------------------------------------------------------- + // Dynamic catalog management + // ------------------------------------------------------------------------- + + /** + * Fetch the latest model catalog from models.dev and cache it. + * Called at engine startup; silently falls back to static data on failure. + * Schedules automatic refresh every 24h. + */ + async refreshCatalog(): Promise { + try { + const catalog = await fetchModels(); + this.catalog = catalog; + + let modelCount = 0; + for (const provider of Object.values(catalog)) { + modelCount += Object.keys(provider.models || {}).length; + } + + logger.info('Model catalog refreshed from models.dev', { + providers: Object.keys(catalog).length, + models: modelCount, + }); + } catch (error) { + logger.warn('Failed to fetch model catalog from models.dev, using static data', { + error: error instanceof Error ? error.message : String(error), + }); + } + + this.scheduleNextRefresh(); + } + + /** + * Stop the background refresh timer (for clean shutdown). + */ + stopRefresh(): void { + if (this.refreshTimer) { + clearTimeout(this.refreshTimer); + this.refreshTimer = null; + } + } + + /** + * Whether a dynamic catalog has been loaded. + */ + get hasCatalog(): boolean { + return this.catalog !== null; + } + + /** + * Get chat model names for a given models.dev provider key. + * Returns model IDs suitable for the provider, filtered to chat models only. + * Used by the API to serve dynamic model suggestions to the frontend. + */ + getModelSuggestions(providerKey: string): string[] { + if (!this.catalog) return []; + + const provider = this.catalog[providerKey]; + if (!provider?.models) return []; + + return Object.entries(provider.models) + .filter(([id, m]) => { + // Exclude non-chat models (embeddings, image, audio, search, TTS) + const lower = id.toLowerCase(); + return ( + !lower.includes('embed') && + !lower.includes('tts') && + !lower.includes('whisper') && + !lower.includes('dall') && + !lower.includes('image') && + !lower.includes('audio') && + !lower.includes('search') && + !lower.includes('transcribe') && + !lower.includes('moderat') && + !lower.includes('realtime') && + !lower.includes('computer-use') && + m.limit?.context !== undefined + ); + }) + .sort((a, b) => { + // Sort by release date descending (newest first), then by context window + const dateA = a[1].release_date || '1970-01'; + const dateB = b[1].release_date || '1970-01'; + if (dateB !== dateA) return dateB.localeCompare(dateA); + return (b[1].limit?.context || 0) - (a[1].limit?.context || 0); + }) + .map(([id]) => id); + } + + // ------------------------------------------------------------------------- + // Private helpers + // ------------------------------------------------------------------------- + + /** + * Look up a model's context window from the dynamic catalog. + * Searches across all providers for the model ID. + */ + private lookupFromCatalog(model: string): number | undefined { + if (!this.catalog) return undefined; + + // Search all providers for the model + for (const provider of Object.values(this.catalog)) { + const entry: ProviderModel | undefined = provider.models?.[model]; + if (entry?.limit?.context) { + return entry.limit.context; + } + } + + return undefined; + } + + private scheduleNextRefresh(): void { + this.stopRefresh(); + this.refreshTimer = setTimeout(() => { + this.refreshCatalog().catch((error) => { + logger.warn('Background catalog refresh failed', { + error: error instanceof Error ? error.message : String(error), + }); + }); + }, CATALOG_TTL_MS); + // Don't prevent process exit + if (this.refreshTimer && typeof this.refreshTimer === 'object' && 'unref' in this.refreshTimer) { + (this.refreshTimer as NodeJS.Timeout).unref(); + } + } +} + +// Singleton instance +export const tokenCounter = new TokenCounter(); diff --git a/src/review/engine.ts b/src/review/engine.ts index b2f2090..02dcc67 100644 --- a/src/review/engine.ts +++ b/src/review/engine.ts @@ -1,8 +1,10 @@ import config from '../config'; +import { llmGateway } from '../llm/gateway'; import { logger } from '../utils/logger'; import { DiffExtractor } from './context/diff-extractor'; import { LocalRepoManager } from './context/local-repo-manager'; import { SandboxExec } from './context/sandbox-exec'; +import { tokenCounter } from './context/token-counter'; import { ReviewOrchestrator } from './orchestrator'; import { FileReviewStore } from './store/file-review-store'; import { CommitReviewPayload, PullRequestReviewPayload, ReviewRun } from './types'; @@ -63,6 +65,22 @@ class ReviewEngine { return; } + // Configure LLM Gateway resilience from current config + llmGateway.updateResilienceConfig( + config.review.llmMaxConcurrentCalls, + { + maxAttempts: config.review.llmRetryMaxAttempts, + baseDelayMs: config.review.llmRetryBaseDelayMs, + } + ); + + // Preload dynamic model catalog from models.dev (non-blocking) + tokenCounter.refreshCatalog().catch((error) => { + logger.warn('Model catalog preload failed, using static data', { + error: error instanceof Error ? error.message : String(error), + }); + }); + await this.store.init(); const recovered = await this.store.recoverInterruptedRuns(); if (recovered > 0) { @@ -78,7 +96,10 @@ class ReviewEngine { }, 1000); this.started = true; - logger.info('Agent Review Engine 已启动'); + logger.info('Agent Review Engine 已启动', { + llmMaxConcurrent: config.review.llmMaxConcurrentCalls, + llmRetryMaxAttempts: config.review.llmRetryMaxAttempts, + }); } async stop(): Promise { @@ -86,6 +107,7 @@ class ReviewEngine { clearInterval(this.timer); this.timer = null; } + tokenCounter.stopRefresh(); this.started = false; }