diff --git a/src/config/config-manager.ts b/src/config/config-manager.ts index 2f7d0d8..82ee16e 100644 --- a/src/config/config-manager.ts +++ b/src/config/config-manager.ts @@ -37,6 +37,10 @@ export interface AppConfig { enableHumanGate: boolean; allowedCommands: string[]; commandTimeoutMs: number; + llmMaxConcurrentCalls: number; + llmRetryMaxAttempts: number; + llmRetryBaseDelayMs: number; + enableTriage: boolean; qdrantUrl: string | undefined; enableMemory: boolean; fewShotExamplesCount: number; @@ -153,6 +157,10 @@ class ConfigManager { 'wc', ]), commandTimeoutMs: toNumber('REVIEW_COMMAND_TIMEOUT_MS', 10000), + llmMaxConcurrentCalls: toNumber('LLM_MAX_CONCURRENT_CALLS', 4), + llmRetryMaxAttempts: toNumber('LLM_RETRY_MAX_ATTEMPTS', 3), + llmRetryBaseDelayMs: toNumber('LLM_RETRY_BASE_DELAY_MS', 1000), + enableTriage: toBoolean('ENABLE_TRIAGE', true), qdrantUrl: values.QDRANT_URL, enableMemory: toBoolean('ENABLE_MEMORY', false), fewShotExamplesCount: toNumber('FEW_SHOT_EXAMPLES_COUNT', 10), diff --git a/src/config/config-schema.ts b/src/config/config-schema.ts index 1f85012..735ee1a 100644 --- a/src/config/config-schema.ts +++ b/src/config/config-schema.ts @@ -266,6 +266,48 @@ export const CONFIG_FIELDS: ConfigFieldMeta[] = [ max: 300000, defaultValue: 10000, }, + { + envKey: 'LLM_MAX_CONCURRENT_CALLS', + group: 'review', + label: 'LLM 最大并发调用', + description: '同时在飞的 LLM API 调用上限(防止触发 Provider 并发/RPM 限制)', + type: 'number', + sensitive: false, + min: 1, + max: 20, + defaultValue: 4, + }, + { + envKey: 'LLM_RETRY_MAX_ATTEMPTS', + group: 'review', + label: 'LLM 重试次数', + description: 'LLM 调用失败后的最大重试次数(仅对 429/网络错误生效)', + type: 'number', + sensitive: false, + min: 1, + max: 10, + defaultValue: 3, + }, + { + envKey: 'LLM_RETRY_BASE_DELAY_MS', + group: 'review', + label: 'LLM 重试基础延迟(ms)', + description: '重试时的基础延迟(指数退避,实际延迟 = baseDelay × 2^(attempt-1))', + type: 'number', + sensitive: false, + min: 100, + max: 30000, + defaultValue: 1000, + }, + { + envKey: 'ENABLE_TRIAGE', + group: 'review', + label: '启用变更分流', + description: '是否启用 Triage 分流(用 Planner 模型先评估变更复杂度,再按需派发 Specialist)', + type: 'boolean', + sensitive: false, + defaultValue: true, + }, // ── 记忆与学习 ────────────────────────────────────────────────────────── { diff --git a/src/llm/__tests__/resilience.test.ts b/src/llm/__tests__/resilience.test.ts new file mode 100644 index 0000000..9680ee0 --- /dev/null +++ b/src/llm/__tests__/resilience.test.ts @@ -0,0 +1,586 @@ +import { describe, expect, test } from 'bun:test'; +import { LLMAuthError, LLMConnectionError, LLMRateLimitError } from '../errors'; +import { LLMSemaphore, retryWithBackoff, withResilience } from '../resilience'; + +describe('LLMSemaphore', () => { + test('constructor throws for maxConcurrent < 1', () => { + try { + new LLMSemaphore(0); + expect(true).toBe(false); + } catch (e: any) { + expect(e.message).toContain('maxConcurrent must be >= 1'); + } + }); + + test('constructor throws for negative maxConcurrent', () => { + try { + new LLMSemaphore(-5); + expect(true).toBe(false); + } catch (e: any) { + expect(e.message).toContain('maxConcurrent must be >= 1'); + } + }); + + test('constructor accepts valid maxConcurrent', () => { + const sem = new LLMSemaphore(5); + expect(sem.activeCount).toBe(0); + expect(sem.pendingCount).toBe(0); + }); + + test('acquire() resolves immediately when under limit', async () => { + const sem = new LLMSemaphore(2); + await sem.acquire(); + expect(sem.activeCount).toBe(1); + expect(sem.pendingCount).toBe(0); + }); + + test('acquire() resolves multiple times when under limit', async () => { + const sem = new LLMSemaphore(3); + await sem.acquire(); + await sem.acquire(); + expect(sem.activeCount).toBe(2); + expect(sem.pendingCount).toBe(0); + }); + + test('acquire() blocks when at limit', async () => { + const sem = new LLMSemaphore(1); + await sem.acquire(); + expect(sem.activeCount).toBe(1); + + let secondAcquireResolved = false; + sem.acquire().then(() => { + secondAcquireResolved = true; + }); + + await new Promise((resolve) => setTimeout(resolve, 10)); + expect(secondAcquireResolved).toBe(false); + expect(sem.pendingCount).toBe(1); + }); + + test('release() allows blocked acquire() to resume', async () => { + const sem = new LLMSemaphore(1); + await sem.acquire(); + + let secondAcquireResolved = false; + const secondPromise = sem.acquire().then(() => { + secondAcquireResolved = true; + }); + + await new Promise((resolve) => setTimeout(resolve, 10)); + expect(secondAcquireResolved).toBe(false); + + sem.release(); + + await secondPromise; + expect(secondAcquireResolved).toBe(true); + expect(sem.activeCount).toBe(1); + }); + + test('activeCount reflects correct state after acquire/release cycle', async () => { + const sem = new LLMSemaphore(2); + expect(sem.activeCount).toBe(0); + + await sem.acquire(); + expect(sem.activeCount).toBe(1); + + await sem.acquire(); + expect(sem.activeCount).toBe(2); + + sem.release(); + expect(sem.activeCount).toBe(1); + + sem.release(); + expect(sem.activeCount).toBe(0); + }); + + test('pendingCount reflects queued requests', async () => { + const sem = new LLMSemaphore(1); + await sem.acquire(); + expect(sem.pendingCount).toBe(0); + + sem.acquire(); + sem.acquire(); + sem.acquire(); + + await new Promise((resolve) => setTimeout(resolve, 10)); + expect(sem.pendingCount).toBe(3); + }); + + test('multiple concurrent acquires with interleaved releases', async () => { + const sem = new LLMSemaphore(2); + const sequence: string[] = []; + + await sem.acquire(); + sequence.push('acquire1'); + await sem.acquire(); + sequence.push('acquire2'); + + expect(sem.activeCount).toBe(2); + expect(sem.pendingCount).toBe(0); + + const p3 = sem.acquire().then(() => sequence.push('acquire3')); + const p4 = sem.acquire().then(() => sequence.push('acquire4')); + const p5 = sem.acquire().then(() => sequence.push('acquire5')); + + await new Promise((resolve) => setTimeout(resolve, 10)); + expect(sem.pendingCount).toBe(3); + + sem.release(); + expect(sem.activeCount).toBe(2); + await new Promise((resolve) => setTimeout(resolve, 10)); + expect(sequence).toContain('acquire3'); + + sem.release(); + expect(sem.activeCount).toBe(2); + await new Promise((resolve) => setTimeout(resolve, 10)); + expect(sequence).toContain('acquire4'); + + sem.release(); + await p5; + expect(sequence).toContain('acquire5'); + }); +}); + +describe('retryWithBackoff', () => { + test('succeeds on first attempt (no retry)', async () => { + let callCount = 0; + const fn = async () => { + callCount++; + return 'success'; + }; + + const result = await retryWithBackoff(fn, { maxAttempts: 3 }); + expect(result).toBe('success'); + expect(callCount).toBe(1); + }); + + test('retries on LLMRateLimitError, succeeds on 2nd attempt', async () => { + let callCount = 0; + const fn = async () => { + callCount++; + if (callCount === 1) { + throw new LLMRateLimitError('openai', 1); + } + return 'success'; + }; + + const result = await retryWithBackoff(fn, { + maxAttempts: 3, + baseDelayMs: 10, + maxDelayMs: 100, + jitter: false, + }); + expect(result).toBe('success'); + expect(callCount).toBe(2); + }); + + test('retries on LLMConnectionError, succeeds on 2nd attempt', async () => { + let callCount = 0; + const fn = async () => { + callCount++; + if (callCount === 1) { + throw new LLMConnectionError('openai', 'network timeout'); + } + return 'success'; + }; + + const result = await retryWithBackoff(fn, { + maxAttempts: 3, + baseDelayMs: 10, + maxDelayMs: 100, + jitter: false, + }); + expect(result).toBe('success'); + expect(callCount).toBe(2); + }); + + test('throws immediately on LLMAuthError (non-retryable)', async () => { + let callCount = 0; + const fn = async () => { + callCount++; + throw new LLMAuthError('openai', 'invalid api key'); + }; + + try { + await retryWithBackoff(fn, { maxAttempts: 3 }); + expect(true).toBe(false); + } catch (e: any) { + expect(e.name).toBe('LLMAuthError'); + expect(callCount).toBe(1); + } + }); + + test('throws immediately on generic Error (non-retryable)', async () => { + let callCount = 0; + const fn = async () => { + callCount++; + throw new Error('some other error'); + }; + + try { + await retryWithBackoff(fn, { maxAttempts: 3 }); + expect(true).toBe(false); + } catch (e: any) { + expect(e.message).toBe('some other error'); + expect(callCount).toBe(1); + } + }); + + test('exhausts maxAttempts and throws last error', async () => { + let callCount = 0; + const fn = async () => { + callCount++; + throw new LLMRateLimitError('openai', undefined, 'rate limited'); + }; + + try { + await retryWithBackoff(fn, { + maxAttempts: 3, + baseDelayMs: 10, + maxDelayMs: 100, + jitter: false, + }); + expect(true).toBe(false); + } catch (e: any) { + expect(e.name).toBe('LLMRateLimitError'); + expect(callCount).toBe(3); + } + }); + + test('respects retryAfterSeconds from LLMRateLimitError', async () => { + let callCount = 0; + let delayStartTime = 0; + let delayEndTime = 0; + + const fn = async () => { + callCount++; + if (callCount === 1) { + delayStartTime = Date.now(); + throw new LLMRateLimitError('openai', 0.1); + } + delayEndTime = Date.now(); + return 'success'; + }; + + const result = await retryWithBackoff(fn, { + maxAttempts: 3, + baseDelayMs: 10, + maxDelayMs: 100, + jitter: false, + }); + + expect(result).toBe('success'); + expect(callCount).toBe(2); + + const actualDelay = delayEndTime - delayStartTime; + expect(actualDelay).toBeGreaterThanOrEqual(50); + expect(actualDelay).toBeLessThan(300); + }); + + test('exponential backoff without jitter: attempt 1 delay ≈ baseDelayMs', async () => { + let callCount = 0; + let delayStartTime = 0; + let delayEndTime = 0; + + const fn = async () => { + callCount++; + if (callCount === 1) { + delayStartTime = Date.now(); + throw new LLMConnectionError('openai'); + } + delayEndTime = Date.now(); + return 'success'; + }; + + await retryWithBackoff(fn, { + maxAttempts: 3, + baseDelayMs: 20, + maxDelayMs: 1000, + jitter: false, + }); + + const actualDelay = delayEndTime - delayStartTime; + expect(actualDelay).toBeGreaterThanOrEqual(10); + expect(actualDelay).toBeLessThan(100); + }); + + test('exponential backoff attempt 2: baseDelayMs * 2', async () => { + let callCount = 0; + let delayStartTime = 0; + let delayEndTime = 0; + + const fn = async () => { + callCount++; + if (callCount === 1) { + throw new LLMConnectionError('openai'); + } + if (callCount === 2) { + delayStartTime = Date.now(); + throw new LLMConnectionError('openai'); + } + delayEndTime = Date.now(); + return 'success'; + }; + + await retryWithBackoff(fn, { + maxAttempts: 4, + baseDelayMs: 20, + maxDelayMs: 1000, + jitter: false, + }); + + const actualDelay = delayEndTime - delayStartTime; + expect(actualDelay).toBeGreaterThanOrEqual(30); + expect(actualDelay).toBeLessThan(150); + }); + + test('custom options: maxAttempts respected', async () => { + let callCount = 0; + const fn = async () => { + callCount++; + throw new LLMConnectionError('openai'); + }; + + try { + await retryWithBackoff(fn, { maxAttempts: 2 }); + expect(true).toBe(false); + } catch (e: any) { + expect(callCount).toBe(2); + } + }); + + test('custom options: baseDelayMs respected', async () => { + let callCount = 0; + let delayStartTime = 0; + let delayEndTime = 0; + + const fn = async () => { + callCount++; + if (callCount === 1) { + delayStartTime = Date.now(); + throw new LLMConnectionError('openai'); + } + delayEndTime = Date.now(); + return 'success'; + }; + + await retryWithBackoff(fn, { + maxAttempts: 3, + baseDelayMs: 50, + maxDelayMs: 1000, + jitter: false, + }); + + const actualDelay = delayEndTime - delayStartTime; + expect(actualDelay).toBeGreaterThanOrEqual(30); + expect(actualDelay).toBeLessThan(200); + }); + + test('jitter disabled: delay is exactly exponential', async () => { + let callCount = 0; + const fn = async () => { + callCount++; + if (callCount < 3) { + throw new LLMConnectionError('openai'); + } + return 'success'; + }; + + const start = Date.now(); + await retryWithBackoff(fn, { + maxAttempts: 4, + baseDelayMs: 10, + maxDelayMs: 100, + jitter: false, + }); + const total = Date.now() - start; + + expect(total).toBeLessThan(200); + }); + + test('jitter enabled: adds randomness to delays', async () => { + let callCount = 0; + let delayStartTime = 0; + let delayEndTime = 0; + + const fn = async () => { + callCount++; + if (callCount === 1) { + delayStartTime = Date.now(); + throw new LLMConnectionError('openai'); + } + delayEndTime = Date.now(); + return 'success'; + }; + + await retryWithBackoff(fn, { + maxAttempts: 3, + baseDelayMs: 10, + maxDelayMs: 100, + jitter: true, + }); + + const actualDelay = delayEndTime - delayStartTime; + expect(actualDelay).toBeGreaterThanOrEqual(5); + expect(actualDelay).toBeLessThan(100); + }); +}); + +describe('withResilience', () => { + test('acquires semaphore, calls fn, releases semaphore on success', async () => { + const sem = new LLMSemaphore(1); + let fnCalled = false; + + await withResilience(sem, async () => { + fnCalled = true; + expect(sem.activeCount).toBe(1); + return 'result'; + }); + + expect(fnCalled).toBe(true); + expect(sem.activeCount).toBe(0); + }); + + test('releases semaphore on failure', async () => { + const sem = new LLMSemaphore(1); + + try { + await withResilience(sem, async () => { + expect(sem.activeCount).toBe(1); + throw new LLMAuthError('openai'); + }); + expect(true).toBe(false); + } catch (e: any) { + expect(e.name).toBe('LLMAuthError'); + expect(sem.activeCount).toBe(0); + } + }); + + test('combines semaphore + retry: respects semaphore then retries', async () => { + const sem = new LLMSemaphore(1); + let callCount = 0; + + const result = await withResilience( + sem, + async () => { + callCount++; + expect(sem.activeCount).toBe(1); + if (callCount === 1) { + throw new LLMRateLimitError('openai', undefined); + } + return 'success'; + }, + { maxAttempts: 2, baseDelayMs: 10, jitter: false } + ); + + expect(result).toBe('success'); + expect(callCount).toBe(2); + expect(sem.activeCount).toBe(0); + }); + + test('releases semaphore even if retry fails', async () => { + const sem = new LLMSemaphore(1); + + try { + await withResilience( + sem, + async () => { + throw new LLMConnectionError('openai'); + }, + { maxAttempts: 2, baseDelayMs: 10, jitter: false } + ); + expect(true).toBe(false); + } catch (e: any) { + expect(e.name).toBe('LLMConnectionError'); + expect(sem.activeCount).toBe(0); + } + }); + + test('respects custom retry options passed to withResilience', async () => { + const sem = new LLMSemaphore(1); + let callCount = 0; + + try { + await withResilience( + sem, + async () => { + callCount++; + throw new LLMConnectionError('openai'); + }, + { maxAttempts: 3, baseDelayMs: 10, jitter: false } + ); + } catch (e: any) { + expect(callCount).toBe(3); + } + }); + + test('concurrent requests respect semaphore limit during retries', async () => { + const sem = new LLMSemaphore(1); + const sequence: string[] = []; + let callCount1 = 0; + let callCount2 = 0; + + const promise1 = withResilience( + sem, + async () => { + callCount1++; + sequence.push('call1'); + if (callCount1 === 1) { + throw new LLMConnectionError('openai'); + } + return 'result1'; + }, + { maxAttempts: 2, baseDelayMs: 10, jitter: false } + ); + + const promise2 = withResilience( + sem, + async () => { + callCount2++; + sequence.push('call2'); + return 'result2'; + }, + { maxAttempts: 2, baseDelayMs: 10, jitter: false } + ); + + const [result1, result2] = await Promise.all([promise1, promise2]); + expect(result1).toBe('result1'); + expect(result2).toBe('result2'); + + expect(callCount1).toBe(2); + expect(callCount2).toBe(1); + + expect(sem.activeCount).toBe(0); + }); + + test('passes label through to retry logging', async () => { + const sem = new LLMSemaphore(1); + let callCount = 0; + + const result = await withResilience( + sem, + async () => { + callCount++; + if (callCount === 1) { + throw new LLMConnectionError('openai'); + } + return 'success'; + }, + { maxAttempts: 2, baseDelayMs: 10, jitter: false }, + 'test-label' + ); + + expect(result).toBe('success'); + }); + + test('returns function result correctly', async () => { + const sem = new LLMSemaphore(2); + + const result = await withResilience(sem, async () => { + return { data: 'test', count: 42 }; + }); + + expect(result.data).toBe('test'); + expect(result.count).toBe(42); + }); +}); diff --git a/src/llm/gateway.ts b/src/llm/gateway.ts index 2f40cde..58d615f 100644 --- a/src/llm/gateway.ts +++ b/src/llm/gateway.ts @@ -6,6 +6,7 @@ * 2. Load (or cache) LLMProvider instances with decrypted API keys * 3. Route chat() calls to the correct adapter * 4. Invalidate cache when provider config changes via UI + * 5. Concurrency control + retry-with-backoff for resilience */ import { type ModelRole, modelRoleRepo } from '../db/repositories/model-role-repo'; @@ -17,6 +18,7 @@ import type { LLMProvider } from './providers/base'; import { createGeminiProvider } from './providers/gemini'; import { createOpenAICompatibleProvider } from './providers/openai-compatible'; import { createOpenAIResponsesProvider } from './providers/openai-responses'; +import { LLMSemaphore, type RetryOptions, withResilience } from './resilience'; import type { LLMChatRequest, LLMChatResponse, ProviderType } from './types'; type ProviderFactoryFn = (config: { @@ -35,6 +37,24 @@ const PROVIDER_FACTORIES: Record = { export class LLMGateway { private cache = new Map(); + private semaphore: LLMSemaphore; + private retryOptions: Partial; + + constructor( + maxConcurrent = 4, + retryOptions?: Partial + ) { + this.semaphore = new LLMSemaphore(maxConcurrent); + this.retryOptions = retryOptions ?? {}; + } + + /** + * Reconfigure resilience settings (called when admin changes config via UI). + */ + updateResilienceConfig(maxConcurrent: number, retryOptions?: Partial): void { + this.semaphore = new LLMSemaphore(maxConcurrent); + this.retryOptions = retryOptions ?? this.retryOptions; + } /** * Call LLM by business role. The role determines which provider + model to use. @@ -47,16 +67,30 @@ export class LLMGateway { const assignment = modelRoleRepo.getByRole(role); if (!assignment) throw new LLMNoProviderError(role); - const provider = this.getOrCreateProvider(assignment.provider_id); - return provider.chat({ ...request, model: assignment.model }); + return withResilience( + this.semaphore, + () => { + const provider = this.getOrCreateProvider(assignment.provider_id); + return provider.chat({ ...request, model: assignment.model }); + }, + this.retryOptions, + role + ); } /** * Direct call to a specific provider (used for connection testing). */ async chatDirect(providerId: string, request: LLMChatRequest): Promise { - const provider = this.getOrCreateProvider(providerId); - return provider.chat(request); + return withResilience( + this.semaphore, + () => { + const provider = this.getOrCreateProvider(providerId); + return provider.chat(request); + }, + this.retryOptions, + `direct:${providerId}` + ); } /** @@ -66,11 +100,18 @@ export class LLMGateway { const assignment = modelRoleRepo.getByRole('embedding'); if (!assignment) throw new LLMNoProviderError('embedding'); - const provider = this.getOrCreateProvider(assignment.provider_id); - if (!provider.embed) { - throw new LLMError(`Provider '${provider.type}' does not support embeddings`, provider.type); - } - return provider.embed(texts); + return withResilience( + this.semaphore, + () => { + const provider = this.getOrCreateProvider(assignment.provider_id); + if (!provider.embed) { + throw new LLMError(`Provider '${provider.type}' does not support embeddings`, provider.type); + } + return provider.embed(texts); + }, + this.retryOptions, + 'embedding' + ); } /** diff --git a/src/llm/resilience.ts b/src/llm/resilience.ts new file mode 100644 index 0000000..76e8593 --- /dev/null +++ b/src/llm/resilience.ts @@ -0,0 +1,173 @@ +/** + * LLM Resilience Layer — Semaphore + Retry-with-Backoff. + * + * Provides concurrency control and automatic retry for LLM API calls. + * Designed to be integrated into the LLMGateway as a transparent wrapper. + */ + +import { logger } from '../utils/logger'; +import { LLMConnectionError, LLMRateLimitError } from './errors'; + +// --------------------------------------------------------------------------- +// Semaphore — limits concurrent in-flight LLM requests +// --------------------------------------------------------------------------- + +export class LLMSemaphore { + private current = 0; + private readonly queue: Array<() => void> = []; + + constructor(private readonly maxConcurrent: number) { + if (maxConcurrent < 1) { + throw new Error(`maxConcurrent must be >= 1, got ${maxConcurrent}`); + } + } + + async acquire(): Promise { + if (this.current < this.maxConcurrent) { + this.current++; + return; + } + return new Promise((resolve) => { + this.queue.push(() => { + this.current++; + resolve(); + }); + }); + } + + release(): void { + this.current--; + const next = this.queue.shift(); + if (next) { + next(); + } + } + + /** Current number of in-flight requests. */ + get activeCount(): number { + return this.current; + } + + /** Number of requests waiting for a slot. */ + get pendingCount(): number { + return this.queue.length; + } +} + +// --------------------------------------------------------------------------- +// Retry with Exponential Backoff +// --------------------------------------------------------------------------- + +export interface RetryOptions { + /** Maximum number of attempts (including the first). Default: 3 */ + maxAttempts: number; + /** Base delay in milliseconds. Default: 1000 */ + baseDelayMs: number; + /** Maximum delay cap in milliseconds. Default: 60000 */ + maxDelayMs: number; + /** Whether to add jitter to delays. Default: true */ + jitter: boolean; +} + +const DEFAULT_RETRY_OPTIONS: RetryOptions = { + maxAttempts: 3, + baseDelayMs: 1000, + maxDelayMs: 60000, + jitter: true, +}; + +/** + * Determines whether an error is retryable. + * Only rate limit (429) and connection errors (5xx, network) are retried. + */ +function isRetryableError(error: unknown): boolean { + return error instanceof LLMRateLimitError || error instanceof LLMConnectionError; +} + +/** + * Calculates the delay for a given attempt. + * Respects `retryAfterSeconds` from rate limit responses when available. + */ +function calculateDelay(attempt: number, error: unknown, options: RetryOptions): number { + // If the provider told us when to retry, respect that + if (error instanceof LLMRateLimitError && error.retryAfterSeconds) { + return error.retryAfterSeconds * 1000; + } + + // Exponential backoff: baseDelay * 2^(attempt-1), capped at maxDelay + const exponential = Math.min(options.baseDelayMs * 2 ** (attempt - 1), options.maxDelayMs); + + // Add jitter to prevent thundering herd + const jitter = options.jitter ? Math.random() * options.baseDelayMs : 0; + + return exponential + jitter; +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +/** + * Retries an async function with exponential backoff. + * Only retries on LLMRateLimitError and LLMConnectionError. + * All other errors are thrown immediately. + */ +export async function retryWithBackoff( + fn: () => Promise, + options?: Partial, + label?: string +): Promise { + const opts = { ...DEFAULT_RETRY_OPTIONS, ...options }; + + for (let attempt = 1; attempt <= opts.maxAttempts; attempt++) { + try { + return await fn(); + } catch (error) { + const isLast = attempt === opts.maxAttempts; + const isRetryable = isRetryableError(error); + + if (isLast || !isRetryable) { + throw error; + } + + const delayMs = calculateDelay(attempt, error, opts); + const errorName = error instanceof Error ? error.constructor.name : 'UnknownError'; + + logger.warn( + `LLM call${label ? ` [${label}]` : ''} failed (${errorName}), retrying in ${Math.round(delayMs)}ms`, + { + attempt, + maxAttempts: opts.maxAttempts, + delayMs: Math.round(delayMs), + error: error instanceof Error ? error.message : String(error), + } + ); + + await sleep(delayMs); + } + } + + // TypeScript exhaustiveness — should never reach here + throw new Error('retryWithBackoff: unreachable'); +} + +// --------------------------------------------------------------------------- +// Combined: withResilience wraps a call with semaphore + retry +// --------------------------------------------------------------------------- + +/** + * Wraps an async function with semaphore-based concurrency control and retry logic. + */ +export async function withResilience( + semaphore: LLMSemaphore, + fn: () => Promise, + retryOptions?: Partial, + label?: string +): Promise { + await semaphore.acquire(); + try { + return await retryWithBackoff(fn, retryOptions, label); + } finally { + semaphore.release(); + } +}