feat(review): add token-aware context control with tokenlens

Replace hardcoded char-count context limits with token-based budgets using tokenlens (data from models.dev). TokenCounter provides 3-tier context window lookup: dynamic catalog (refreshed every 24h) → static tokenlens → 128k default. - specialist-agent: token budget from model context window instead of MAX_CONTEXT_CHARS=100k - critic-agent/reflexion-agent: tokenCounter.clip() instead of diff.slice(0, 3000/2000) - diff-extractor: raw diff clipping at 30k tokens - engine.ts: refreshCatalog() at startup, stopRefresh() at shutdown Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)
2026-03-27 10:05:50 +00:00 · 2026-03-05 22:03:00 +08:00
parent 86480dec07
commit ec2029a942
9 changed files with 447 additions and 17 deletions
--- a/bun.lock
+++ b/bun.lock
@@ -14,6 +14,7 @@
        "hono": "^4.11.9",
        "lodash-es": "^4.17.21",
        "openai": "^4.87.3",
+        "tokenlens": "^1.3.1",
        "zod": "^3.25.1",
        "zod-to-json-schema": "^3.25.1",
      },
@@ -142,6 +143,14 @@

    "@sindresorhus/merge-streams": ["@sindresorhus/merge-streams@4.0.0", "", {}, "sha512-tlqY9xq5ukxTUZBmoOp+m61cqwQD5pHJtFY3Mn8CA8ps6yghLH/Hw8UPdqg4OLmFW3IFlcXnQNmo/dh8HzXYIQ=="],

+    "@tokenlens/core": ["@tokenlens/core@1.3.0", "", {}, "sha512-d8YNHNC+q10bVpi95fELJwJyPVf1HfvBEI18eFQxRSZTdByXrP+f/ZtlhSzkx0Jl0aEmYVeBA5tPeeYRioLViQ=="],
+
+    "@tokenlens/fetch": ["@tokenlens/fetch@1.3.0", "", { "dependencies": { "@tokenlens/core": "1.3.0" } }, "sha512-RONDRmETYly9xO8XMKblmrZjKSwCva4s5ebJwQNfNlChZoA5kplPoCgnWceHnn1J1iRjLVlrCNB43ichfmGBKQ=="],
+
+    "@tokenlens/helpers": ["@tokenlens/helpers@1.3.1", "", { "dependencies": { "@tokenlens/core": "1.3.0", "@tokenlens/fetch": "1.3.0" } }, "sha512-t6yL8N6ES8337E6eVSeH4hCKnPdWkZRFpupy9w5E66Q9IeqQ9IO7XQ6gh12JKjvWiRHuyyJ8MBP5I549Cr41EQ=="],
+
+    "@tokenlens/models": ["@tokenlens/models@1.3.0", "", { "dependencies": { "@tokenlens/core": "1.3.0" } }, "sha512-9mx7ZGeewW4ndXAiD7AT1bbCk4OpJeortbjHHyNkgap+pMPPn1chY6R5zqe1ggXIUzZ2l8VOAKfPqOvpcrisJw=="],
+
    "@types/bun": ["@types/bun@1.3.10", "", { "dependencies": { "bun-types": "1.3.10" } }, "sha512-0+rlrUrOrTSskibryHbvQkDOWRJwJZqZlxrUs1u4oOoTln8+WIXBPmAuCF35SWB2z4Zl3E84Nl/D0P7803nigQ=="],

    "@types/lodash": ["@types/lodash@4.17.24", "", {}, "sha512-gIW7lQLZbue7lRSWEFql49QJJWThrTFFeIMJdp3eH4tKoxm1OvEPg02rm4wCCSHS0cL3/Fizimb35b7k8atwsQ=="],
@@ -680,6 +689,8 @@

    "to-regex-range": ["to-regex-range@5.0.1", "", { "dependencies": { "is-number": "^7.0.0" } }, "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ=="],

+    "tokenlens": ["tokenlens@1.3.1", "", { "dependencies": { "@tokenlens/core": "1.3.0", "@tokenlens/fetch": "1.3.0", "@tokenlens/helpers": "1.3.1", "@tokenlens/models": "1.3.0" } }, "sha512-7oxmsS5PNCX3z+b+z07hL5vCzlgHKkCGrEQjQmWl5l+v5cUrtL7S1cuST4XThaL1XyjbTX8J5hfP0cjDJRkaLA=="],
+
    "tr46": ["tr46@0.0.3", "", {}, "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="],

    "traverse": ["traverse@0.6.8", "", {}, "sha512-aXJDbk6SnumuaZSANd21XAo15ucCDE38H4fkqiGsc3MhCK+wOlZvLP9cB/TvpHT0mOyWgC4Z8EwRlzqYSUzdsA=="],
@@ -778,6 +789,8 @@

    "env-ci/execa": ["execa@8.0.1", "", { "dependencies": { "cross-spawn": "^7.0.3", "get-stream": "^8.0.1", "human-signals": "^5.0.0", "is-stream": "^3.0.0", "merge-stream": "^2.0.0", "npm-run-path": "^5.1.0", "onetime": "^6.0.0", "signal-exit": "^4.1.0", "strip-final-newline": "^3.0.0" } }, "sha512-VyhnebXciFV2DESc+p6B+y0LjSm0krU4OgJN44qFAhBY0TJ+1V61tYD2+wHusZ6F9n5K+vl8k0sTy7PEfV4qpg=="],

+    "fdir/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="],
+
    "fetch-blob/web-streams-polyfill": ["web-streams-polyfill@3.3.3", "", {}, "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw=="],

    "foreground-child/signal-exit": ["signal-exit@4.1.0", "", {}, "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw=="],
--- a/package.json
+++ b/package.json
@@ -15,6 +15,7 @@
    "hono": "^4.11.9",
    "lodash-es": "^4.17.21",
    "openai": "^4.87.3",
+    "tokenlens": "^1.3.1",
    "zod": "^3.25.1",
    "zod-to-json-schema": "^3.25.1"
  },
--- a/src/review/agents/critic-agent.ts
+++ b/src/review/agents/critic-agent.ts
@@ -3,6 +3,7 @@ import type { LLMGateway } from '../../llm/gateway';
 import type { LLMMessage } from '../../llm/types';
 import { withGlobalPrompt } from '../../utils/global-prompt';
 import { logger } from '../../utils/logger';
+import { tokenCounter } from '../context/token-counter';
 import { Finding, ReviewContext } from '../types';

 export interface CritiqueResult {
@@ -41,7 +42,7 @@ export class CriticAgent {
 ${JSON.stringify(findings, null, 2)}

 原始代码变更片段（供参考）：
-${context.diff.slice(0, 3000)}
+${tokenCounter.clip(context.diff, 1000)}

 评估标准：
 1. **Evidence充分性**: 证据是否充分支持结论？是否引用了具体代码？
@@ -147,7 +148,7 @@ Finding:
 - Confidence: ${finding.confidence}

 代码上下文：
-${context.diff.slice(0, 2000)}
+${tokenCounter.clip(context.diff, 700)}

 判断：
 1. 这个finding是否有效（不是误报）？
--- a/src/review/agents/reflexion-agent.ts
+++ b/src/review/agents/reflexion-agent.ts
@@ -4,6 +4,7 @@ import type { LLMGateway } from '../../llm/gateway';
 import type { LLMMessage } from '../../llm/types';
 import { withGlobalPrompt } from '../../utils/global-prompt';
 import { logger } from '../../utils/logger';
+import { tokenCounter } from '../context/token-counter';
 import { LearningSystem } from '../learning/learning-system';
 import { findingResponseSchema } from '../schema/finding-schema';
 import { ToolRegistry } from '../tools/registry';
@@ -128,7 +129,7 @@ ${critique.missedIssues.map((missed) => `- ${missed}`).join('\n')}
 总体评估: ${critique.overallAssessment}

 代码上下文：
-${context.diff.slice(0, 3000)}
+${tokenCounter.clip(context.diff, 1000)}

 任务：
 1. 修正有问题的findings（根据批评意见）
--- a/src/review/agents/specialist-agent.ts
+++ b/src/review/agents/specialist-agent.ts
@@ -9,6 +9,7 @@ import { findingResponseSchema } from '../schema/finding-schema';
 import { ToolRegistry } from '../tools/registry';
 import type { ToolExecutionContext, ToolResult } from '../tools/types';
 import { AgentResult, Finding, FindingCategory, ReviewContext, ReviewRun } from '../types';
+import { tokenCounter } from '../context/token-counter';

 function buildFingerprint(category: string, path: string, line: number, title: string): string {
  return createHash('sha256')
@@ -18,8 +19,8 @@ function buildFingerprint(category: string, path: string, line: number, title: s
 }

 function toCompactContext(context: ReviewContext): string {
-  // 全局上下文大小限制：100k chars（约33k tokens），为系统prompt、few-shot、响应留空间
-  const MAX_CONTEXT_CHARS = 100_000;
+  // Token-based budget: 25k tokens for context, leaving room for system prompt + few-shot + response
+  const MAX_CONTEXT_TOKENS = 25_000;

  const files = context.changedFiles.map((file) => ({
    path: file.path,
@@ -28,7 +29,7 @@ function toCompactContext(context: ReviewContext): string {
    deletions: file.deletions,
  }));

-  // 策略：逐步缩减直到满足限制
+  // 策略：逐步缩减直到满足 token 限制
  // 1. changedFiles元数据（小且必需）
  // 2. parsedDiff（关键，逐步减少每个文件的changes数量）
  // 3. fileContents（最大，按需截断或移除部分文件）
@@ -36,7 +37,6 @@ function toCompactContext(context: ReviewContext): string {
  let maxChangesPerFile = 200;
  let maxFileContentsEntries = Object.keys(context.fileContents).length;

-  // 尝试构建并测量大小
  const tryBuild = (changesLimit: number, contentEntriesLimit: number): string => {
    const snippets = context.parsedDiff.map((file) => ({
      path: file.path,
@@ -63,9 +63,9 @@ function toCompactContext(context: ReviewContext): string {

  let result = tryBuild(maxChangesPerFile, maxFileContentsEntries);

-  // 如果超过限制，逐步缩减
+  // 如果超过 token 限制，逐步缩减
  while (
-    result.length > MAX_CONTEXT_CHARS &&
+    tokenCounter.count(result) > MAX_CONTEXT_TOKENS &&
    (maxChangesPerFile > 20 || maxFileContentsEntries > 0)
  ) {
    if (maxChangesPerFile > 20) {
@@ -77,13 +77,13 @@ function toCompactContext(context: ReviewContext): string {
    result = tryBuild(maxChangesPerFile, maxFileContentsEntries);
  }

-  // 如果仍然超限，强制截断（保留前N个字符）
-  if (result.length > MAX_CONTEXT_CHARS) {
-    logger.warn('Context size still exceeds limit after reduction, truncating', {
-      originalSize: result.length,
-      limit: MAX_CONTEXT_CHARS,
+  // 如果仍然超限，强制截断
+  if (tokenCounter.count(result) > MAX_CONTEXT_TOKENS) {
+    logger.warn('Context size still exceeds token limit after reduction, truncating', {
+      estimatedTokens: tokenCounter.count(result),
+      limit: MAX_CONTEXT_TOKENS,
    });
-    result = `${result.slice(0, MAX_CONTEXT_CHARS)}\n... [truncated]`;
+    result = tokenCounter.clip(result, MAX_CONTEXT_TOKENS);
  }

  return result;
--- a/src/review/context/tests/token-counter.test.ts
+++ b/src/review/context/tests/token-counter.test.ts
@@ -0,0 +1,124 @@
+// @ts-ignore bun:test is provided by Bun at runtime
+declare module 'bun:test' {
+  // @ts-ignore bun:test types may already exist
+  export const describe: any;
+  // @ts-ignore bun:test types may already exist
+  export const test: any;
+  // @ts-ignore bun:test types may already exist
+  export const expect: any;
+}
+
+// @ts-ignore bun:test is provided by Bun at runtime
+import { describe, expect, test } from 'bun:test';
+
+import { TokenCounter, tokenCounter } from '../token-counter';
+
+describe('TokenCounter.count', () => {
+  test('returns 0 for empty string', () => {
+    const counter = new TokenCounter();
+    expect(counter.count('')).toBe(0);
+  });
+
+  test('uses ceil(length / 3.5) for known string', () => {
+    const counter = new TokenCounter();
+    expect(counter.count('hello')).toBe(2);
+  });
+
+  test('handles longer text using same formula', () => {
+    const counter = new TokenCounter();
+    const text = 'a'.repeat(36);
+    expect(counter.count(text)).toBe(Math.ceil(36 / 3.5));
+  });
+});
+
+describe('TokenCounter.clip', () => {
+  test('returns text as-is when within budget', () => {
+    const counter = new TokenCounter();
+    const text = 'short text';
+    expect(counter.clip(text, 100)).toBe(text);
+  });
+
+  test('truncates with message when exceeding budget', () => {
+    const counter = new TokenCounter();
+    const maxTokens = 4;
+    const maxChars = Math.floor(maxTokens * 3.5);
+    const text = 'abcdefghijklmnopqrstuvwxyz';
+
+    const clipped = counter.clip(text, maxTokens);
+
+    expect(clipped.startsWith(text.slice(0, maxChars))).toBe(true);
+    expect(clipped).toContain('[truncated, exceeded 4 token budget]');
+    expect(clipped).toBe(`${text.slice(0, maxChars)}\n... [truncated, exceeded 4 token budget]`);
+  });
+});
+
+describe('TokenCounter.getContextWindow', () => {
+  test('returns known context window for gpt-4o', () => {
+    const counter = new TokenCounter();
+    expect(counter.getContextWindow('gpt-4o')).toBe(128000);
+  });
+
+  test('returns known context window for claude-sonnet-4-20250514', () => {
+    const counter = new TokenCounter();
+    expect(counter.getContextWindow('claude-sonnet-4-20250514')).toBe(200000);
+  });
+
+  test('returns default context window for versioned model not in registry', () => {
+    const counter = new TokenCounter();
+    // tokenlens static registry may not have dated variants — falls back to default
+    expect(counter.getContextWindow('gpt-4o-2024-08-06')).toBe(128000);
+  });
+
+  test('returns default context window for unknown models', () => {
+    const counter = new TokenCounter();
+    expect(counter.getContextWindow('unknown-model-xyz')).toBe(128000);
+  });
+});
+
+describe('TokenCounter.getUsableBudget', () => {
+  test('returns contextWindow - 4000 for known model', () => {
+    const counter = new TokenCounter();
+    expect(counter.getUsableBudget('gpt-4o')).toBe(124000);
+  });
+
+  test('never returns less than 1000 for tiny context window', () => {
+    class TinyWindowTokenCounter extends TokenCounter {
+      getContextWindow(_model: string): number {
+        return 500;
+      }
+    }
+
+    const counter = new TinyWindowTokenCounter();
+    expect(counter.getUsableBudget('tiny-model')).toBe(1000);
+  });
+});
+
+describe('TokenCounter exports and constructor options', () => {
+  test('exports singleton tokenCounter instance', () => {
+    expect(tokenCounter).toBeInstanceOf(TokenCounter);
+  });
+
+  test('supports custom charsPerToken in constructor', () => {
+    const counter = new TokenCounter(2);
+    expect(counter.count('hello')).toBe(3);
+  });
+});
+
+describe('TokenCounter dynamic catalog', () => {
+  test('hasCatalog is false before refreshCatalog', () => {
+    const counter = new TokenCounter();
+    expect(counter.hasCatalog).toBe(false);
+  });
+
+  test('getContextWindow works without catalog (static fallback)', () => {
+    const counter = new TokenCounter();
+    // Should use static tokenlens data, not crash
+    expect(counter.getContextWindow('gpt-4o')).toBe(128000);
+  });
+
+  test('stopRefresh is safe to call without active timer', () => {
+    const counter = new TokenCounter();
+    // Should not throw
+    counter.stopRefresh();
+  });
+});
--- a/src/review/context/diff-extractor.ts
+++ b/src/review/context/diff-extractor.ts
@@ -1,8 +1,10 @@
 import { lstat, readFile } from 'node:fs/promises';
 import path from 'node:path';
+import { logger } from '../../utils/logger';
 import { ChangedFile, DiffFile, ReviewContext, ReviewRun } from '../types';
 import { LocalRepoManager } from './local-repo-manager';
 import { SandboxExec } from './sandbox-exec';
+import { tokenCounter } from './token-counter';

 function toStatus(status: string): ChangedFile['status'] {
  const value = status.trim().charAt(0).toUpperCase();
@@ -65,10 +67,23 @@ export class DiffExtractor {

    const fileContents = await this.readChangedFileContents(workspacePath, changedFiles);

+    // Token-budget guard: clip raw diff to 30k tokens to prevent context overflow
+    // This is the raw diff that gets passed around; toCompactContext() does further reduction
+    const MAX_RAW_DIFF_TOKENS = 30_000;
+    let clippedDiff = diff;
+    const diffTokens = tokenCounter.count(diff);
+    if (diffTokens > MAX_RAW_DIFF_TOKENS) {
+      logger.warn('Raw diff exceeds token budget, clipping', {
+        estimatedTokens: diffTokens,
+        limit: MAX_RAW_DIFF_TOKENS,
+      });
+      clippedDiff = tokenCounter.clip(diff, MAX_RAW_DIFF_TOKENS);
+    }
+
    return {
      workspacePath,
      mirrorPath,
-      diff,
+      diff: clippedDiff,
      changedFiles,
      parsedDiff,
      fileContents,
--- a/src/review/context/token-counter.ts
+++ b/src/review/context/token-counter.ts
@@ -0,0 +1,253 @@
+/**
+ * Token Counter — estimates token counts for context budget management.
+ *
+ * Uses a simple chars-to-tokens ratio estimation by default (chars / 3.5),
+ * which is a widely-used approximation for English/code content.
+ *
+ * Context window lookup strategy (ordered by priority):
+ *   1. Dynamic catalog fetched from models.dev at startup (3000+ models, always fresh)
+ *   2. Static tokenlens built-in registry (offline fallback)
+ *   3. Conservative 128k default for truly unknown models
+ */
+
+import type { ModelCatalog, ProviderModel } from '@tokenlens/core';
+import { fetchModels, getContextWindow as tlGetContextWindow } from 'tokenlens';
+
+import { logger } from '../../utils/logger';
+
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+
+/**
+ * Default chars-per-token ratio.
+ * English text: ~4 chars/token. Code: ~3.5 chars/token.
+ * We use 3.5 to be conservative (slightly over-estimate tokens).
+ */
+const DEFAULT_CHARS_PER_TOKEN = 3.5;
+
+/**
+ * Default context window when model is unknown.
+ * Conservative: assume 128k to avoid overflow on most modern models.
+ */
+const DEFAULT_CONTEXT_WINDOW = 128_000;
+
+/**
+ * Reserve tokens for system prompt, output, and overhead.
+ * This is subtracted from the total context window to get the usable budget.
+ */
+const RESERVED_TOKENS = 4_000;
+
+/** How often to refresh the dynamic catalog (24 hours). */
+const CATALOG_TTL_MS = 24 * 60 * 60 * 1000;
+
+// ---------------------------------------------------------------------------
+// TokenCounter
+// ---------------------------------------------------------------------------
+
+export class TokenCounter {
+  private readonly charsPerToken: number;
+
+  /** Dynamic catalog fetched from models.dev. null until first refresh. */
+  private catalog: ModelCatalog | null = null;
+  private refreshTimer: ReturnType<typeof setTimeout> | null = null;
+
+  constructor(charsPerToken = DEFAULT_CHARS_PER_TOKEN) {
+    this.charsPerToken = charsPerToken;
+  }
+
+  // -------------------------------------------------------------------------
+  // Token estimation
+  // -------------------------------------------------------------------------
+
+  /**
+   * Estimate token count for a string.
+   */
+  count(text: string): number {
+    return Math.ceil(text.length / this.charsPerToken);
+  }
+
+  /**
+   * Clip text to fit within a token budget.
+   * Returns the text truncated to approximately `maxTokens` tokens.
+   */
+  clip(text: string, maxTokens: number): string {
+    const maxChars = Math.floor(maxTokens * this.charsPerToken);
+    if (text.length <= maxChars) {
+      return text;
+    }
+    return `${text.slice(0, maxChars)}\n... [truncated, exceeded ${maxTokens} token budget]`;
+  }
+
+  // -------------------------------------------------------------------------
+  // Context window lookup
+  // -------------------------------------------------------------------------
+
+  /**
+   * Get the usable token budget for a given model.
+   * = context_window - reserved_tokens
+   */
+  getUsableBudget(model: string): number {
+    const contextWindow = this.getContextWindow(model);
+    const budget = contextWindow - RESERVED_TOKENS;
+    return Math.max(budget, 1000); // never return less than 1000
+  }
+
+  /**
+   * Get the context window size for a model.
+   *
+   * Lookup order:
+   *   1. Dynamic catalog (from models.dev, refreshed every 24h)
+   *   2. Static tokenlens registry (built into the package)
+   *   3. DEFAULT_CONTEXT_WINDOW (128k)
+   */
+  getContextWindow(model: string): number {
+    // 1. Try dynamic catalog first
+    const dynamicResult = this.lookupFromCatalog(model);
+    if (dynamicResult && dynamicResult > 0) {
+      return dynamicResult;
+    }
+
+    // 2. Fall back to static tokenlens registry
+    const staticResult = tlGetContextWindow(model);
+    const staticWindow = staticResult?.combinedMax ?? staticResult?.totalMax;
+    if (staticWindow && staticWindow > 0) {
+      return staticWindow;
+    }
+
+    // 3. Default
+    logger.debug(
+      `Unknown model "${model}", using default context window ${DEFAULT_CONTEXT_WINDOW}`
+    );
+    return DEFAULT_CONTEXT_WINDOW;
+  }
+
+  // -------------------------------------------------------------------------
+  // Dynamic catalog management
+  // -------------------------------------------------------------------------
+
+  /**
+   * Fetch the latest model catalog from models.dev and cache it.
+   * Called at engine startup; silently falls back to static data on failure.
+   * Schedules automatic refresh every 24h.
+   */
+  async refreshCatalog(): Promise<void> {
+    try {
+      const catalog = await fetchModels();
+      this.catalog = catalog;
+
+      let modelCount = 0;
+      for (const provider of Object.values(catalog)) {
+        modelCount += Object.keys(provider.models || {}).length;
+      }
+
+      logger.info('Model catalog refreshed from models.dev', {
+        providers: Object.keys(catalog).length,
+        models: modelCount,
+      });
+    } catch (error) {
+      logger.warn('Failed to fetch model catalog from models.dev, using static data', {
+        error: error instanceof Error ? error.message : String(error),
+      });
+    }
+
+    this.scheduleNextRefresh();
+  }
+
+  /**
+   * Stop the background refresh timer (for clean shutdown).
+   */
+  stopRefresh(): void {
+    if (this.refreshTimer) {
+      clearTimeout(this.refreshTimer);
+      this.refreshTimer = null;
+    }
+  }
+
+  /**
+   * Whether a dynamic catalog has been loaded.
+   */
+  get hasCatalog(): boolean {
+    return this.catalog !== null;
+  }
+
+  /**
+   * Get chat model names for a given models.dev provider key.
+   * Returns model IDs suitable for the provider, filtered to chat models only.
+   * Used by the API to serve dynamic model suggestions to the frontend.
+   */
+  getModelSuggestions(providerKey: string): string[] {
+    if (!this.catalog) return [];
+
+    const provider = this.catalog[providerKey];
+    if (!provider?.models) return [];
+
+    return Object.entries(provider.models)
+      .filter(([id, m]) => {
+        // Exclude non-chat models (embeddings, image, audio, search, TTS)
+        const lower = id.toLowerCase();
+        return (
+          !lower.includes('embed') &&
+          !lower.includes('tts') &&
+          !lower.includes('whisper') &&
+          !lower.includes('dall') &&
+          !lower.includes('image') &&
+          !lower.includes('audio') &&
+          !lower.includes('search') &&
+          !lower.includes('transcribe') &&
+          !lower.includes('moderat') &&
+          !lower.includes('realtime') &&
+          !lower.includes('computer-use') &&
+          m.limit?.context !== undefined
+        );
+      })
+      .sort((a, b) => {
+        // Sort by release date descending (newest first), then by context window
+        const dateA = a[1].release_date || '1970-01';
+        const dateB = b[1].release_date || '1970-01';
+        if (dateB !== dateA) return dateB.localeCompare(dateA);
+        return (b[1].limit?.context || 0) - (a[1].limit?.context || 0);
+      })
+      .map(([id]) => id);
+  }
+
+  // -------------------------------------------------------------------------
+  // Private helpers
+  // -------------------------------------------------------------------------
+
+  /**
+   * Look up a model's context window from the dynamic catalog.
+   * Searches across all providers for the model ID.
+   */
+  private lookupFromCatalog(model: string): number | undefined {
+    if (!this.catalog) return undefined;
+
+    // Search all providers for the model
+    for (const provider of Object.values(this.catalog)) {
+      const entry: ProviderModel | undefined = provider.models?.[model];
+      if (entry?.limit?.context) {
+        return entry.limit.context;
+      }
+    }
+
+    return undefined;
+  }
+
+  private scheduleNextRefresh(): void {
+    this.stopRefresh();
+    this.refreshTimer = setTimeout(() => {
+      this.refreshCatalog().catch((error) => {
+        logger.warn('Background catalog refresh failed', {
+          error: error instanceof Error ? error.message : String(error),
+        });
+      });
+    }, CATALOG_TTL_MS);
+    // Don't prevent process exit
+    if (this.refreshTimer && typeof this.refreshTimer === 'object' && 'unref' in this.refreshTimer) {
+      (this.refreshTimer as NodeJS.Timeout).unref();
+    }
+  }
+}
+
+// Singleton instance
+export const tokenCounter = new TokenCounter();
--- a/src/review/engine.ts
+++ b/src/review/engine.ts
@@ -1,8 +1,10 @@
 import config from '../config';
+import { llmGateway } from '../llm/gateway';
 import { logger } from '../utils/logger';
 import { DiffExtractor } from './context/diff-extractor';
 import { LocalRepoManager } from './context/local-repo-manager';
 import { SandboxExec } from './context/sandbox-exec';
+import { tokenCounter } from './context/token-counter';
 import { ReviewOrchestrator } from './orchestrator';
 import { FileReviewStore } from './store/file-review-store';
 import { CommitReviewPayload, PullRequestReviewPayload, ReviewRun } from './types';
@@ -63,6 +65,22 @@ class ReviewEngine {
      return;
    }

+    // Configure LLM Gateway resilience from current config
+    llmGateway.updateResilienceConfig(
+      config.review.llmMaxConcurrentCalls,
+      {
+        maxAttempts: config.review.llmRetryMaxAttempts,
+        baseDelayMs: config.review.llmRetryBaseDelayMs,
+      }
+    );
+
+    // Preload dynamic model catalog from models.dev (non-blocking)
+    tokenCounter.refreshCatalog().catch((error) => {
+      logger.warn('Model catalog preload failed, using static data', {
+        error: error instanceof Error ? error.message : String(error),
+      });
+    });
+
    await this.store.init();
    const recovered = await this.store.recoverInterruptedRuns();
    if (recovered > 0) {
@@ -78,7 +96,10 @@ class ReviewEngine {
    }, 1000);

    this.started = true;
-    logger.info('Agent Review Engine 已启动');
+    logger.info('Agent Review Engine 已启动', {
+      llmMaxConcurrent: config.review.llmMaxConcurrentCalls,
+      llmRetryMaxAttempts: config.review.llmRetryMaxAttempts,
+    });
  }

  async stop(): Promise<void> {
@@ -86,6 +107,7 @@ class ReviewEngine {
      clearInterval(this.timer);
      this.timer = null;
    }
+    tokenCounter.stopRefresh();
    this.started = false;
  }