From c9a2db3df2c73ef22cb2c32fd80bdf36bfa1e697 Mon Sep 17 00:00:00 2001 From: jeffusion Date: Thu, 5 Mar 2026 00:31:14 +0800 Subject: [PATCH] feat(llm): add pluggable multi-provider LLM architecture Introduce provider-agnostic LLM gateway supporting 4 provider types: OpenAI Compatible, OpenAI Responses API, Anthropic Messages API, and Google Gemini API. Each provider normalizes to a unified LLMChatResponse format with tool call support. Includes AES-256-GCM encrypted secret management for API keys and a tool-converter for cross-provider tool format translation. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) --- src/crypto/secrets.ts | 136 +++++++++++++ src/llm/capabilities.ts | 63 +++++++ src/llm/errors.ts | 92 +++++++++ src/llm/gateway.ts | 140 ++++++++++++++ src/llm/providers/anthropic.ts | 252 +++++++++++++++++++++++++ src/llm/providers/base.ts | 50 +++++ src/llm/providers/gemini.ts | 234 +++++++++++++++++++++++ src/llm/providers/openai-compatible.ts | 172 +++++++++++++++++ src/llm/providers/openai-responses.ts | 211 +++++++++++++++++++++ src/llm/tool-converter.ts | 37 ++++ src/llm/types.ts | 104 ++++++++++ 11 files changed, 1491 insertions(+) create mode 100644 src/crypto/secrets.ts create mode 100644 src/llm/capabilities.ts create mode 100644 src/llm/errors.ts create mode 100644 src/llm/gateway.ts create mode 100644 src/llm/providers/anthropic.ts create mode 100644 src/llm/providers/base.ts create mode 100644 src/llm/providers/gemini.ts create mode 100644 src/llm/providers/openai-compatible.ts create mode 100644 src/llm/providers/openai-responses.ts create mode 100644 src/llm/tool-converter.ts create mode 100644 src/llm/types.ts diff --git a/src/crypto/secrets.ts b/src/crypto/secrets.ts new file mode 100644 index 0000000..dcdf0e6 --- /dev/null +++ b/src/crypto/secrets.ts @@ -0,0 +1,136 @@ +/** + * AES-256-GCM encryption module for API key storage. + * + * Master key lifecycle: + * 1. On first startup, generate 32 random bytes → write to DATA_DIR/master.key (chmod 600) + * 2. On subsequent startups, read existing master.key + * 3. Key stays in memory for process lifetime; never logged or exposed via API + */ + +import { chmodSync, existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; +import { dirname, resolve } from 'node:path'; + +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +const KEY_LENGTH = 32; // AES-256 +const IV_LENGTH = 12; // GCM recommended nonce size + + +// --------------------------------------------------------------------------- +// Master Key Management +// --------------------------------------------------------------------------- + +let masterKey: Buffer | null = null; + +/** + * Resolve the master key file path. + * Defaults to `data/master.key` relative to CWD, overridable via `MASTER_KEY_PATH` env. + */ +function getMasterKeyPath(): string { + return resolve(process.env.MASTER_KEY_PATH || './data/master.key'); +} + +/** + * Initialize (load or generate) the master encryption key. + * MUST be called once at application startup before any encrypt/decrypt operations. + */ +export function initMasterKey(): void { + const keyPath = getMasterKeyPath(); + + if (existsSync(keyPath)) { + const raw = readFileSync(keyPath); + if (raw.length !== KEY_LENGTH) { + throw new Error( + `Master key at ${keyPath} is ${raw.length} bytes, expected ${KEY_LENGTH}. Delete the file and restart to generate a new key (all encrypted API keys will need to be re-entered).` + ); + } + masterKey = Buffer.from(raw); + console.log(`🔑 Master key loaded from ${keyPath}`); + } else { + const dir = dirname(keyPath); + mkdirSync(dir, { recursive: true }); + + const key = Buffer.from(crypto.getRandomValues(new Uint8Array(KEY_LENGTH))); + writeFileSync(keyPath, key, { mode: 0o600 }); + + try { + chmodSync(keyPath, 0o600); + } catch { + // chmod may fail on some filesystems (e.g. Windows); non-fatal + } + + masterKey = key; + console.log(`🔑 Generated new master key at ${keyPath}`); + } +} + +/** + * Get the current master key. Throws if not initialized. + */ +function getKey(): Buffer { + if (!masterKey) { + throw new Error('Master key not initialized. Call initMasterKey() at startup.'); + } + return masterKey; +} + +// --------------------------------------------------------------------------- +// Encryption / Decryption +// --------------------------------------------------------------------------- + +export interface EncryptedPayload { + /** AES-256-GCM ciphertext */ + ciphertext: Buffer; + /** 12-byte initialization vector / nonce */ + iv: Buffer; + /** 16-byte GCM authentication tag */ + authTag: Buffer; +} + +/** + * Encrypt a plaintext string (e.g. API key) with AES-256-GCM. + * + * @returns Encrypted payload containing ciphertext, IV, and auth tag + */ +export function encrypt(plaintext: string): EncryptedPayload { + const key = getKey(); + const iv = Buffer.from(crypto.getRandomValues(new Uint8Array(IV_LENGTH))); + + // Use Web Crypto–style via Node's built-in crypto + const { createCipheriv } = require('node:crypto') as typeof import('node:crypto'); + const cipher = createCipheriv('aes-256-gcm', key, iv); + + const encrypted = Buffer.concat([cipher.update(plaintext, 'utf8'), cipher.final()]); + const authTag = cipher.getAuthTag(); + + return { + ciphertext: encrypted, + iv, + authTag, + }; +} + +/** + * Decrypt an AES-256-GCM encrypted payload back to plaintext. + * + * @throws If auth tag verification fails (tampered data or wrong key) + */ +export function decrypt(payload: EncryptedPayload): string { + const key = getKey(); + + const { createDecipheriv } = require('node:crypto') as typeof import('node:crypto'); + const decipher = createDecipheriv('aes-256-gcm', key, payload.iv); + decipher.setAuthTag(payload.authTag); + + const decrypted = Buffer.concat([decipher.update(payload.ciphertext), decipher.final()]); + return decrypted.toString('utf8'); +} + +/** + * Check if the master key has been initialized. + */ +export function isMasterKeyReady(): boolean { + return masterKey !== null; +} diff --git a/src/llm/capabilities.ts b/src/llm/capabilities.ts new file mode 100644 index 0000000..8437ba4 --- /dev/null +++ b/src/llm/capabilities.ts @@ -0,0 +1,63 @@ +/** + * Provider capability declarations. + * + * Each provider type declares what features it supports natively. + * The Gateway and adapters use this to make runtime decisions + * (e.g. prompt-inject JSON mode for Anthropic, skip embedding calls for non-supporting providers). + */ + +import type { ProviderType } from './types'; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export interface ProviderCapabilities { + /** Supports tool/function calling */ + supportsTools: boolean; + /** Supports native JSON mode (vs. prompt injection + manual parsing) */ + supportsJsonMode: boolean; + /** Supports SSE streaming */ + supportsStreaming: boolean; + /** Supports embedding API */ + supportsEmbeddings: boolean; + /** Supports image/multimodal input */ + supportsMultimodal: boolean; + /** Max input token count (for pre-validation, avoids wasted API calls) */ + maxInputTokens?: number; +} + +// --------------------------------------------------------------------------- +// Default capabilities per provider type +// --------------------------------------------------------------------------- + +export const DEFAULT_CAPABILITIES: Record = { + openai_compatible: { + supportsTools: true, + supportsJsonMode: true, + supportsStreaming: true, + supportsEmbeddings: true, + supportsMultimodal: false, // depends on actual model behind the compatible endpoint + }, + openai_responses: { + supportsTools: true, + supportsJsonMode: true, + supportsStreaming: true, + supportsEmbeddings: true, + supportsMultimodal: true, + }, + anthropic: { + supportsTools: true, + supportsJsonMode: false, // no native JSON mode; adapter uses prompt injection + supportsStreaming: true, + supportsEmbeddings: false, + supportsMultimodal: true, + }, + gemini: { + supportsTools: true, + supportsJsonMode: true, // responseMimeType: 'application/json' + supportsStreaming: true, + supportsEmbeddings: true, + supportsMultimodal: true, + }, +}; diff --git a/src/llm/errors.ts b/src/llm/errors.ts new file mode 100644 index 0000000..e480cc9 --- /dev/null +++ b/src/llm/errors.ts @@ -0,0 +1,92 @@ +/** + * Standardized error types for the LLM layer. + * + * All provider adapters catch native errors and re-throw as one of these. + * Business code only needs to handle LLMError subtypes. + */ + +// --------------------------------------------------------------------------- +// Base error +// --------------------------------------------------------------------------- + +export class LLMError extends Error { + /** Provider type that caused the error */ + readonly provider: string; + /** HTTP status code from the provider, if available */ + readonly statusCode?: number; + + constructor(message: string, provider: string, statusCode?: number) { + super(message); + this.name = 'LLMError'; + this.provider = provider; + this.statusCode = statusCode; + } +} + +// --------------------------------------------------------------------------- +// Specific error types +// --------------------------------------------------------------------------- + +/** Authentication failed (invalid/expired API key). */ +export class LLMAuthError extends LLMError { + constructor(provider: string, message?: string) { + super(message || 'Authentication failed: invalid or expired API key', provider, 401); + this.name = 'LLMAuthError'; + } +} + +/** Rate limit exceeded. */ +export class LLMRateLimitError extends LLMError { + /** Seconds until the rate limit resets, if provided by the provider. */ + readonly retryAfterSeconds?: number; + + constructor(provider: string, retryAfterSeconds?: number, message?: string) { + super( + message || + `Rate limit exceeded${retryAfterSeconds ? ` (retry after ${retryAfterSeconds}s)` : ''}`, + provider, + 429 + ); + this.name = 'LLMRateLimitError'; + this.retryAfterSeconds = retryAfterSeconds; + } +} + +/** Input too long / context window exceeded. */ +export class LLMContextLengthError extends LLMError { + constructor(provider: string, message?: string) { + super(message || 'Input exceeds model context length', provider, 400); + this.name = 'LLMContextLengthError'; + } +} + +/** Provider returned an invalid or unparseable response. */ +export class LLMResponseError extends LLMError { + /** The raw response body that couldn't be parsed */ + readonly rawResponse?: unknown; + + constructor(provider: string, message?: string, rawResponse?: unknown) { + super(message || 'Invalid response from provider', provider); + this.name = 'LLMResponseError'; + this.rawResponse = rawResponse; + } +} + +/** Provider is unreachable (network error, timeout, 5xx). */ +export class LLMConnectionError extends LLMError { + constructor(provider: string, message?: string, statusCode?: number) { + super(message || 'Failed to connect to provider', provider, statusCode); + this.name = 'LLMConnectionError'; + } +} + +/** No provider is configured for the requested role. */ +export class LLMNoProviderError extends LLMError { + readonly role: string; + + constructor(role: string) { + super(`No provider configured for role '${role}'`, 'gateway'); + this.name = 'LLMNoProviderError'; + this.role = role; + } +} diff --git a/src/llm/gateway.ts b/src/llm/gateway.ts new file mode 100644 index 0000000..2f40cde --- /dev/null +++ b/src/llm/gateway.ts @@ -0,0 +1,140 @@ +/** + * LLM Gateway — the sole entry point for all business-layer LLM calls. + * + * Responsibilities: + * 1. Look up model_role_assignments → provider_id + model for a given role + * 2. Load (or cache) LLMProvider instances with decrypted API keys + * 3. Route chat() calls to the correct adapter + * 4. Invalidate cache when provider config changes via UI + */ + +import { type ModelRole, modelRoleRepo } from '../db/repositories/model-role-repo'; +import { providerRepo } from '../db/repositories/provider-repo'; +import { secretRepo } from '../db/repositories/secret-repo'; +import { LLMAuthError, LLMError, LLMNoProviderError } from './errors'; +import { createAnthropicProvider } from './providers/anthropic'; +import type { LLMProvider } from './providers/base'; +import { createGeminiProvider } from './providers/gemini'; +import { createOpenAICompatibleProvider } from './providers/openai-compatible'; +import { createOpenAIResponsesProvider } from './providers/openai-responses'; +import type { LLMChatRequest, LLMChatResponse, ProviderType } from './types'; + +type ProviderFactoryFn = (config: { + baseUrl?: string; + apiKey: string; + defaultModel: string; + extraConfig: Record; +}) => LLMProvider; + +const PROVIDER_FACTORIES: Record = { + openai_compatible: createOpenAICompatibleProvider, + openai_responses: createOpenAIResponsesProvider, + anthropic: createAnthropicProvider, + gemini: createGeminiProvider, +}; + +export class LLMGateway { + private cache = new Map(); + + /** + * Call LLM by business role. The role determines which provider + model to use. + * The `model` field in the request is ignored — it's resolved from the DB assignment. + */ + async chatForRole( + role: ModelRole, + request: Omit + ): Promise { + const assignment = modelRoleRepo.getByRole(role); + if (!assignment) throw new LLMNoProviderError(role); + + const provider = this.getOrCreateProvider(assignment.provider_id); + return provider.chat({ ...request, model: assignment.model }); + } + + /** + * Direct call to a specific provider (used for connection testing). + */ + async chatDirect(providerId: string, request: LLMChatRequest): Promise { + const provider = this.getOrCreateProvider(providerId); + return provider.chat(request); + } + + /** + * Embedding via the provider assigned to the 'embedding' role. + */ + async embedForRole(texts: string[]): Promise { + const assignment = modelRoleRepo.getByRole('embedding'); + if (!assignment) throw new LLMNoProviderError('embedding'); + + const provider = this.getOrCreateProvider(assignment.provider_id); + if (!provider.embed) { + throw new LLMError(`Provider '${provider.type}' does not support embeddings`, provider.type); + } + return provider.embed(texts); + } + + /** + * Invalidate cached provider instance (call when config/key changes via UI). + */ + invalidateProvider(providerId: string): void { + this.cache.delete(providerId); + } + + /** + * Clear all cached providers (call on global config change). + */ + invalidateAll(): void { + this.cache.clear(); + } + + /** Get or create a provider instance (for direct access like listing models). */ + getProviderInstance(providerId: string): LLMProvider { + return this.getOrCreateProvider(providerId); + } + + private getOrCreateProvider(providerId: string): LLMProvider { + const cached = this.cache.get(providerId); + if (cached) return cached; + + const providerRow = providerRepo.getById(providerId); + if (!providerRow) { + throw new LLMError(`Provider '${providerId}' not found in database`, 'gateway'); + } + if (!providerRow.is_enabled) { + throw new LLMError(`Provider '${providerRow.name}' is disabled`, 'gateway'); + } + + const apiKey = secretRepo.get(providerId); + if (!apiKey) { + throw new LLMAuthError( + providerRow.type, + `No API key configured for provider '${providerRow.name}'` + ); + } + + const factory = PROVIDER_FACTORIES[providerRow.type as ProviderType]; + if (!factory) { + throw new LLMError(`Unknown provider type '${providerRow.type}'`, providerRow.type); + } + + let extraConfig: Record = {}; + try { + extraConfig = JSON.parse(providerRow.extra_config || '{}'); + } catch { + // Invalid JSON in extra_config — use empty + } + + const provider = factory({ + baseUrl: providerRow.base_url || undefined, + apiKey, + defaultModel: providerRow.default_model, + extraConfig, + }); + + this.cache.set(providerId, provider); + return provider; + } +} + +// Singleton instance +export const llmGateway = new LLMGateway(); diff --git a/src/llm/providers/anthropic.ts b/src/llm/providers/anthropic.ts new file mode 100644 index 0000000..78adf4d --- /dev/null +++ b/src/llm/providers/anthropic.ts @@ -0,0 +1,252 @@ +/** + * Anthropic Messages API adapter. + * + * Key differences from OpenAI: + * - System message → `system` top-level param (not in messages array) + * - No native JSON mode → prompt injection + JSON.parse with regex fallback + * - Tool results → role='user' with tool_result content blocks + * - Finish reasons: end_turn→stop, tool_use→tool_calls, max_tokens→length + */ + +import Anthropic from '@anthropic-ai/sdk'; +import { DEFAULT_CAPABILITIES, type ProviderCapabilities } from '../capabilities'; +import { + LLMAuthError, + LLMConnectionError, + LLMContextLengthError, + LLMRateLimitError, + LLMResponseError, +} from '../errors'; +import { toAnthropicTools } from '../tool-converter'; +import type { + LLMChatRequest, + LLMChatResponse, + LLMFinishReason, + LLMMessage, + LLMToolCall, +} from '../types'; +import type { LLMProvider, ProviderFactory } from './base'; + +const TYPE = 'anthropic'; + +const JSON_MODE_SUFFIX = + '\n\nYou MUST respond with valid JSON only. No markdown, no explanation, no other text outside the JSON object.'; + +function mapStopReason(reason: string | null | undefined): LLMFinishReason { + switch (reason) { + case 'end_turn': + return 'stop'; + case 'tool_use': + return 'tool_calls'; + case 'max_tokens': + return 'length'; + default: + return 'stop'; + } +} + +/** + * Separate system messages from conversation messages. + * Anthropic requires system as a top-level param, not inside the messages array. + */ +function convertMessages( + messages: LLMMessage[], + jsonMode: boolean +): { system: string | undefined; anthropicMessages: Anthropic.MessageParam[] } { + let system: string | undefined; + const anthropicMessages: Anthropic.MessageParam[] = []; + + for (const msg of messages) { + if (msg.role === 'system') { + const text = jsonMode ? msg.content + JSON_MODE_SUFFIX : msg.content; + system = system ? `${system}\n${text}` : text; + continue; + } + + if (msg.role === 'tool') { + // Tool results go as user messages with tool_result content block + anthropicMessages.push({ + role: 'user', + content: [ + { + type: 'tool_result', + tool_use_id: msg.toolCallId!, + content: msg.content, + }, + ], + }); + continue; + } + + if (msg.role === 'assistant' && msg.toolCalls?.length) { + const content: Anthropic.ContentBlockParam[] = []; + if (msg.content) { + content.push({ type: 'text', text: msg.content }); + } + for (const tc of msg.toolCalls) { + content.push({ + type: 'tool_use', + id: tc.id, + name: tc.name, + input: JSON.parse(tc.arguments), + }); + } + anthropicMessages.push({ role: 'assistant', content }); + continue; + } + + anthropicMessages.push({ + role: msg.role as 'user' | 'assistant', + content: msg.content, + }); + } + + // If JSON mode requested but no system message existed, create one + if (jsonMode && !system) { + system = JSON_MODE_SUFFIX.trim(); + } + + return { system, anthropicMessages }; +} + +/** + * Try to extract JSON from content that might have markdown wrapping. + * Falls back to returning the raw content. + */ +function extractJson(content: string): string { + // Try direct parse first + try { + JSON.parse(content); + return content; + } catch { + // Noop — try regex extraction + } + + // Try extracting from ```json ... ``` blocks + const jsonBlockMatch = content.match(/```(?:json)?\s*\n?([\s\S]*?)\n?\s*```/); + if (jsonBlockMatch?.[1]) { + try { + JSON.parse(jsonBlockMatch[1].trim()); + return jsonBlockMatch[1].trim(); + } catch { + // Noop + } + } + + // Try finding first { ... } or [ ... ] + const objectMatch = content.match(/\{[\s\S]*\}/); + if (objectMatch?.[0]) { + try { + JSON.parse(objectMatch[0]); + return objectMatch[0]; + } catch { + // Noop + } + } + + return content; +} + +class AnthropicProvider implements LLMProvider { + readonly type = TYPE; + readonly capabilities: ProviderCapabilities; + private client: Anthropic; + private defaultModel: string; + + constructor(config: { + baseUrl?: string; + apiKey: string; + defaultModel: string; + extraConfig: Record; + }) { + this.client = new Anthropic({ + apiKey: config.apiKey, + ...(config.baseUrl ? { baseURL: config.baseUrl } : {}), + }); + this.defaultModel = config.defaultModel; + this.capabilities = { ...DEFAULT_CAPABILITIES.anthropic }; + } + + async chat(request: LLMChatRequest): Promise { + const model = request.model || this.defaultModel; + const jsonMode = request.responseFormat === 'json'; + const { system, anthropicMessages } = convertMessages(request.messages, jsonMode); + + const params: Anthropic.MessageCreateParamsNonStreaming = { + model, + messages: anthropicMessages, + max_tokens: request.maxTokens ?? 4096, + ...(system ? { system } : {}), + ...(request.temperature !== undefined ? { temperature: request.temperature } : {}), + ...(request.tools?.length ? { tools: toAnthropicTools(request.tools) as any } : {}), + }; + + try { + const response = await this.client.messages.create(params); + + let content: string | null = null; + const toolCalls: LLMToolCall[] = []; + + for (const block of response.content) { + if (block.type === 'text') { + content = (content || '') + block.text; + } else if (block.type === 'tool_use') { + toolCalls.push({ + id: block.id, + name: block.name, + arguments: JSON.stringify(block.input), + }); + } + } + + // JSON mode fallback: extract JSON from content + if (jsonMode && content) { + content = extractJson(content); + } + + return { + content, + toolCalls, + finishReason: mapStopReason(response.stop_reason), + usage: { + promptTokens: response.usage.input_tokens, + completionTokens: response.usage.output_tokens, + totalTokens: response.usage.input_tokens + response.usage.output_tokens, + }, + raw: response, + }; + } catch (error) { + throw this.wrapError(error); + } + } + + async listModels(): Promise { + const response = await this.client.models.list(); + const models: string[] = []; + for await (const page of response.iterPages()) { + models.push(...page.data.map((model) => model.id)); + } + return models; + } + + private wrapError(error: unknown): Error { + if (error instanceof Anthropic.APIError) { + if (error.status === 401) return new LLMAuthError(TYPE, error.message); + if (error.status === 429) return new LLMRateLimitError(TYPE); + if (error.status === 400 && error.message?.includes('too long')) + return new LLMContextLengthError(TYPE, error.message); + if (error.status && error.status >= 500) + return new LLMConnectionError(TYPE, error.message, error.status); + return new LLMResponseError(TYPE, error.message); + } + if ( + error instanceof Error && + (error.message.includes('ECONNREFUSED') || error.message.includes('fetch')) + ) { + return new LLMConnectionError(TYPE, error.message); + } + return error instanceof Error ? error : new Error(String(error)); + } +} + +export const createAnthropicProvider: ProviderFactory = (config) => new AnthropicProvider(config); diff --git a/src/llm/providers/base.ts b/src/llm/providers/base.ts new file mode 100644 index 0000000..73fc093 --- /dev/null +++ b/src/llm/providers/base.ts @@ -0,0 +1,50 @@ +/** + * LLMProvider abstract interface. + * + * Each adapter (openai-compatible, openai-responses, anthropic, gemini) + * implements this interface. The Gateway only interacts with providers + * through this contract. + */ + +import type { ProviderCapabilities } from '../capabilities'; +import type { LLMChatRequest, LLMChatResponse } from '../types'; + +// --------------------------------------------------------------------------- +// Provider Interface +// --------------------------------------------------------------------------- + +export interface LLMProvider { + /** Provider type identifier (e.g. 'openai_compatible', 'anthropic'). */ + readonly type: string; + + /** Capability declaration for this provider. */ + readonly capabilities: ProviderCapabilities; + + /** + * Core chat method. The Gateway only calls this method. + * + * Each adapter is responsible for: + * 1. Converting LLMChatRequest to provider-native format + * 2. Making the HTTP/SDK call + * 3. Converting the native response to LLMChatResponse + */ + chat(request: LLMChatRequest): Promise; + + /** Optional: embedding interface (only for providers that support it). */ + embed?(texts: string[]): Promise; + + /** Optional: list available models for this provider. */ + listModels?(): Promise; +} + +// --------------------------------------------------------------------------- +// Provider Factory +// --------------------------------------------------------------------------- + +/** Factory function signature: create an LLMProvider from DB config + decrypted API key. */ +export type ProviderFactory = (config: { + baseUrl?: string; + apiKey: string; + defaultModel: string; + extraConfig: Record; +}) => LLMProvider; diff --git a/src/llm/providers/gemini.ts b/src/llm/providers/gemini.ts new file mode 100644 index 0000000..1c6a30d --- /dev/null +++ b/src/llm/providers/gemini.ts @@ -0,0 +1,234 @@ +/** + * Google Gemini generateContent API adapter. + * + * Key differences from OpenAI: + * - System message → systemInstruction param + * - Role 'assistant' → 'model' + * - Tool calls → functionCall parts; tool results → functionResponse parts + * - Finish reasons: STOP, FUNCTION_CALL (unofficial—actually STOP), MAX_TOKENS, SAFETY + * - Token usage: usageMetadata.{promptTokenCount, candidatesTokenCount} + */ + +import { type Content, GoogleGenAI, type Part } from '@google/genai'; +import { DEFAULT_CAPABILITIES, type ProviderCapabilities } from '../capabilities'; +import { + LLMAuthError, + LLMConnectionError, + LLMContextLengthError, + LLMRateLimitError, + LLMResponseError, +} from '../errors'; +import { toGeminiTools } from '../tool-converter'; +import type { + LLMChatRequest, + LLMChatResponse, + LLMFinishReason, + LLMMessage, + LLMToolCall, +} from '../types'; +import type { LLMProvider, ProviderFactory } from './base'; + +const TYPE = 'gemini'; + +function mapFinishReason(reason: string | undefined): LLMFinishReason { + switch (reason) { + case 'STOP': + return 'stop'; + case 'MAX_TOKENS': + return 'length'; + case 'SAFETY': + return 'content_filter'; + case 'RECITATION': + return 'content_filter'; + default: + return 'stop'; + } +} + +function convertMessages(messages: LLMMessage[]): { + systemInstruction: string | undefined; + contents: Content[]; +} { + let systemInstruction: string | undefined; + const contents: Content[] = []; + + for (const msg of messages) { + if (msg.role === 'system') { + systemInstruction = systemInstruction ? `${systemInstruction}\n${msg.content}` : msg.content; + continue; + } + + if (msg.role === 'tool') { + // Tool results → function role with functionResponse part + contents.push({ + role: 'function', + parts: [ + { + functionResponse: { + name: msg.toolCallId || 'unknown', + response: safeParseJson(msg.content), + }, + } as Part, + ], + }); + continue; + } + + if (msg.role === 'assistant' && msg.toolCalls?.length) { + const parts: Part[] = []; + if (msg.content) { + parts.push({ text: msg.content }); + } + for (const tc of msg.toolCalls) { + parts.push({ + functionCall: { + name: tc.name, + args: safeParseJson(tc.arguments), + }, + } as Part); + } + contents.push({ role: 'model', parts }); + continue; + } + + // Gemini uses 'model' instead of 'assistant' + const role = msg.role === 'assistant' ? 'model' : 'user'; + contents.push({ + role, + parts: [{ text: msg.content }], + }); + } + + return { systemInstruction, contents }; +} + +function safeParseJson(str: string): any { + try { + return JSON.parse(str); + } catch { + return { result: str }; + } +} + +class GeminiProvider implements LLMProvider { + readonly type = TYPE; + readonly capabilities: ProviderCapabilities; + private genAI: GoogleGenAI; + private defaultModel: string; + + constructor(config: { + baseUrl?: string; + apiKey: string; + defaultModel: string; + extraConfig: Record; + }) { + this.genAI = new GoogleGenAI({ apiKey: config.apiKey }); + this.defaultModel = config.defaultModel; + this.capabilities = { ...DEFAULT_CAPABILITIES.gemini }; + } + + async chat(request: LLMChatRequest): Promise { + const modelId = request.model || this.defaultModel; + const { systemInstruction, contents } = convertMessages(request.messages); + + try { + const response = await this.genAI.models.generateContent({ + model: modelId, + contents, + config: { + ...(systemInstruction ? { systemInstruction } : {}), + ...(request.temperature !== undefined ? { temperature: request.temperature } : {}), + ...(request.maxTokens !== undefined ? { maxOutputTokens: request.maxTokens } : {}), + ...(request.responseFormat === 'json' ? { responseMimeType: 'application/json' } : {}), + ...(request.tools?.length ? { tools: toGeminiTools(request.tools) as any } : {}), + }, + }); + const candidate = response.candidates?.[0]; + + let content: string | null = null; + const toolCalls: LLMToolCall[] = []; + + if (candidate?.content?.parts) { + for (const part of candidate.content.parts) { + if ('text' in part && part.text) { + content = (content || '') + part.text; + } + if ('functionCall' in part && part.functionCall) { + const toolName = String(part.functionCall.name ?? 'unknown'); + toolCalls.push({ + id: `call_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`, + name: toolName, + arguments: JSON.stringify(part.functionCall.args || {}), + }); + } + } + } + + const hasFunctionCalls = toolCalls.length > 0; + const finishReason = hasFunctionCalls + ? 'tool_calls' + : mapFinishReason(candidate?.finishReason as string | undefined); + + const usage = response.usageMetadata; + + return { + content, + toolCalls, + finishReason, + usage: { + promptTokens: usage?.promptTokenCount ?? 0, + completionTokens: usage?.candidatesTokenCount ?? 0, + totalTokens: usage?.totalTokenCount ?? 0, + }, + raw: response, + }; + } catch (error) { + throw this.wrapError(error); + } + } + + async embed(texts: string[]): Promise { + try { + const results: number[][] = []; + for (const text of texts) { + const result = await this.genAI.models.embedContent({ + model: this.defaultModel, + contents: text, + }); + results.push(result.embeddings?.[0]?.values ?? []); + } + return results; + } catch (error) { + throw this.wrapError(error); + } + } + + async listModels(): Promise { + const pager = await this.genAI.models.list(); + const models: string[] = []; + for await (const model of pager) { + if (model.name) { + models.push(model.name.replace('models/', '')); + } + } + return models; + } + + private wrapError(error: unknown): Error { + if (error instanceof Error) { + const msg = error.message; + if (msg.includes('API_KEY_INVALID') || msg.includes('401')) + return new LLMAuthError(TYPE, msg); + if (msg.includes('429') || msg.includes('RESOURCE_EXHAUSTED')) + return new LLMRateLimitError(TYPE); + if (msg.includes('context') || msg.includes('too long')) + return new LLMContextLengthError(TYPE, msg); + if (msg.includes('ECONNREFUSED') || msg.includes('fetch') || msg.includes('500')) + return new LLMConnectionError(TYPE, msg); + return new LLMResponseError(TYPE, msg); + } + return new Error(String(error)); + } +} + +export const createGeminiProvider: ProviderFactory = (config) => new GeminiProvider(config); diff --git a/src/llm/providers/openai-compatible.ts b/src/llm/providers/openai-compatible.ts new file mode 100644 index 0000000..e40ff60 --- /dev/null +++ b/src/llm/providers/openai-compatible.ts @@ -0,0 +1,172 @@ +/** + * OpenAI Compatible adapter — for any service implementing the OpenAI chat.completions API. + * This is the existing format used by the codebase today. + */ + +import OpenAI from 'openai'; +import { DEFAULT_CAPABILITIES, type ProviderCapabilities } from '../capabilities'; +import { + LLMAuthError, + LLMConnectionError, + LLMContextLengthError, + LLMRateLimitError, + LLMResponseError, +} from '../errors'; +import { toOpenAITools } from '../tool-converter'; +import type { + LLMChatRequest, + LLMChatResponse, + LLMFinishReason, + LLMMessage, + LLMToolCall, +} from '../types'; +import type { LLMProvider, ProviderFactory } from './base'; + +const TYPE = 'openai_compatible'; + +function mapFinishReason(reason: string | null | undefined): LLMFinishReason { + switch (reason) { + case 'stop': + return 'stop'; + case 'tool_calls': + return 'tool_calls'; + case 'length': + return 'length'; + case 'content_filter': + return 'content_filter'; + default: + return 'stop'; + } +} + +function toOpenAIMessages(messages: LLMMessage[]): OpenAI.ChatCompletionMessageParam[] { + return messages.map((msg) => { + if (msg.role === 'tool') { + return { + role: 'tool' as const, + content: msg.content, + tool_call_id: msg.toolCallId!, + }; + } + if (msg.role === 'assistant' && msg.toolCalls?.length) { + return { + role: 'assistant' as const, + content: msg.content || null, + tool_calls: msg.toolCalls.map((tc) => ({ + id: tc.id, + type: 'function' as const, + function: { name: tc.name, arguments: tc.arguments }, + })), + }; + } + return { + role: msg.role as 'system' | 'user' | 'assistant', + content: msg.content, + }; + }); +} + +function extractToolCalls(choice: OpenAI.ChatCompletion.Choice): LLMToolCall[] { + const toolCalls = choice.message.tool_calls; + if (!toolCalls?.length) return []; + return toolCalls.map((tc) => ({ + id: tc.id, + name: tc.function.name, + arguments: tc.function.arguments, + })); +} + +class OpenAICompatibleProvider implements LLMProvider { + readonly type = TYPE; + readonly capabilities: ProviderCapabilities; + private client: OpenAI; + private defaultModel: string; + + constructor(config: { + baseUrl?: string; + apiKey: string; + defaultModel: string; + extraConfig: Record; + }) { + this.client = new OpenAI({ + baseURL: config.baseUrl || 'https://api.openai.com/v1', + apiKey: config.apiKey, + ...(config.extraConfig.organization + ? { organization: config.extraConfig.organization as string } + : {}), + }); + this.defaultModel = config.defaultModel; + this.capabilities = { ...DEFAULT_CAPABILITIES.openai_compatible }; + } + + async chat(request: LLMChatRequest): Promise { + const model = request.model || this.defaultModel; + + const params: OpenAI.ChatCompletionCreateParamsNonStreaming = { + model, + messages: toOpenAIMessages(request.messages), + ...(request.temperature !== undefined ? { temperature: request.temperature } : {}), + ...(request.maxTokens !== undefined ? { max_tokens: request.maxTokens } : {}), + ...(request.responseFormat === 'json' ? { response_format: { type: 'json_object' } } : {}), + ...(request.tools?.length ? { tools: toOpenAITools(request.tools) as any } : {}), + }; + + try { + const response = await this.client.chat.completions.create(params); + const choice = response.choices[0]; + + return { + content: choice?.message?.content ?? null, + toolCalls: choice ? extractToolCalls(choice) : [], + finishReason: mapFinishReason(choice?.finish_reason), + usage: { + promptTokens: response.usage?.prompt_tokens ?? 0, + completionTokens: response.usage?.completion_tokens ?? 0, + totalTokens: response.usage?.total_tokens ?? 0, + }, + raw: response, + }; + } catch (error) { + throw this.wrapError(error); + } + } + + async embed(texts: string[]): Promise { + try { + const response = await this.client.embeddings.create({ + model: this.defaultModel, + input: texts, + }); + return response.data.map((d) => d.embedding); + } catch (error) { + throw this.wrapError(error); + } + } + + async listModels(): Promise { + const response = await this.client.models.list(); + return response.data.map((model) => model.id); + } + + private wrapError(error: unknown): Error { + if (error instanceof OpenAI.APIError) { + if (error.status === 401) return new LLMAuthError(TYPE, error.message); + if (error.status === 429) return new LLMRateLimitError(TYPE); + if (error.status === 400 && error.message?.includes('context_length')) + return new LLMContextLengthError(TYPE, error.message); + if (error.status && error.status >= 500) + return new LLMConnectionError(TYPE, error.message, error.status); + return new LLMResponseError(TYPE, error.message); + } + if ( + error instanceof Error && + (error.message.includes('ECONNREFUSED') || error.message.includes('fetch')) + ) { + return new LLMConnectionError(TYPE, error.message); + } + return error instanceof Error ? error : new Error(String(error)); + } +} + +export const createOpenAICompatibleProvider: ProviderFactory = (config) => + new OpenAICompatibleProvider(config); diff --git a/src/llm/providers/openai-responses.ts b/src/llm/providers/openai-responses.ts new file mode 100644 index 0000000..26a6079 --- /dev/null +++ b/src/llm/providers/openai-responses.ts @@ -0,0 +1,211 @@ +/** + * OpenAI Responses API adapter — uses the newer responses.create endpoint. + * System instructions are passed as the `instructions` parameter instead of a system message. + */ + +import OpenAI from 'openai'; +import { DEFAULT_CAPABILITIES, type ProviderCapabilities } from '../capabilities'; +import { + LLMAuthError, + LLMConnectionError, + LLMContextLengthError, + LLMRateLimitError, + LLMResponseError, +} from '../errors'; +import { toOpenAITools } from '../tool-converter'; +import type { + LLMChatRequest, + LLMChatResponse, + LLMFinishReason, + LLMMessage, + LLMToolCall, +} from '../types'; +import type { LLMProvider, ProviderFactory } from './base'; + +const TYPE = 'openai_responses'; + +function mapStatus(status: string | undefined): LLMFinishReason { + switch (status) { + case 'completed': + return 'stop'; + case 'incomplete': + return 'length'; + default: + return 'stop'; + } +} + +/** + * Extract system instructions and build input items from unified messages. + * System messages → `instructions` parameter. + * Remaining messages → `input` array. + */ +function convertMessages(messages: LLMMessage[]): { + instructions: string | undefined; + input: any[]; +} { + let instructions: string | undefined; + const input: any[] = []; + + for (const msg of messages) { + if (msg.role === 'system') { + instructions = instructions ? `${instructions}\n${msg.content}` : msg.content; + continue; + } + + if (msg.role === 'tool') { + input.push({ + type: 'function_call_output', + call_id: msg.toolCallId, + output: msg.content, + }); + continue; + } + + if (msg.role === 'assistant' && msg.toolCalls?.length) { + // Emit function_call items for each tool call + for (const tc of msg.toolCalls) { + input.push({ + type: 'function_call', + id: tc.id, + name: tc.name, + arguments: tc.arguments, + }); + } + // Also emit text if present + if (msg.content) { + input.push({ role: 'assistant', content: msg.content }); + } + continue; + } + + input.push({ + role: msg.role, + content: msg.content, + }); + } + + return { instructions, input }; +} + +class OpenAIResponsesProvider implements LLMProvider { + readonly type = TYPE; + readonly capabilities: ProviderCapabilities; + private client: OpenAI; + private defaultModel: string; + + constructor(config: { + baseUrl?: string; + apiKey: string; + defaultModel: string; + extraConfig: Record; + }) { + this.client = new OpenAI({ + baseURL: config.baseUrl || undefined, + apiKey: config.apiKey, + }); + this.defaultModel = config.defaultModel; + this.capabilities = { ...DEFAULT_CAPABILITIES.openai_responses }; + } + + async chat(request: LLMChatRequest): Promise { + const model = request.model || this.defaultModel; + const { instructions, input } = convertMessages(request.messages); + + const params: any = { + model, + input, + ...(instructions ? { instructions } : {}), + ...(request.temperature !== undefined ? { temperature: request.temperature } : {}), + ...(request.maxTokens !== undefined ? { max_output_tokens: request.maxTokens } : {}), + ...(request.responseFormat === 'json' ? { text: { format: { type: 'json_object' } } } : {}), + ...(request.tools?.length + ? { + tools: toOpenAITools(request.tools).map((t: any) => ({ + type: 'function', + ...t.function, + })), + } + : {}), + }; + + try { + const response = await this.client.responses.create(params); + + // Extract content and tool calls from output + let content: string | null = null; + const toolCalls: LLMToolCall[] = []; + + for (const item of response.output || []) { + if (item.type === 'message') { + for (const block of (item as any).content || []) { + if (block.type === 'output_text') { + content = (content || '') + block.text; + } + } + } else if (item.type === 'function_call') { + toolCalls.push({ + id: (item as any).id || (item as any).call_id || '', + name: (item as any).name, + arguments: (item as any).arguments || '{}', + }); + } + } + + return { + content, + toolCalls, + finishReason: toolCalls.length > 0 ? 'tool_calls' : mapStatus(response.status), + usage: { + promptTokens: (response.usage as any)?.input_tokens ?? 0, + completionTokens: (response.usage as any)?.output_tokens ?? 0, + totalTokens: + ((response.usage as any)?.input_tokens ?? 0) + + ((response.usage as any)?.output_tokens ?? 0), + }, + raw: response, + }; + } catch (error) { + throw this.wrapError(error); + } + } + + async embed(texts: string[]): Promise { + try { + const response = await this.client.embeddings.create({ + model: this.defaultModel, + input: texts, + }); + return response.data.map((d) => d.embedding); + } catch (error) { + throw this.wrapError(error); + } + } + + async listModels(): Promise { + const response = await this.client.models.list(); + return response.data.map((model) => model.id); + } + + private wrapError(error: unknown): Error { + if (error instanceof OpenAI.APIError) { + if (error.status === 401) return new LLMAuthError(TYPE, error.message); + if (error.status === 429) return new LLMRateLimitError(TYPE); + if (error.status === 400 && error.message?.includes('context_length')) + return new LLMContextLengthError(TYPE, error.message); + if (error.status && error.status >= 500) + return new LLMConnectionError(TYPE, error.message, error.status); + return new LLMResponseError(TYPE, error.message); + } + if ( + error instanceof Error && + (error.message.includes('ECONNREFUSED') || error.message.includes('fetch')) + ) { + return new LLMConnectionError(TYPE, error.message); + } + return error instanceof Error ? error : new Error(String(error)); + } +} + +export const createOpenAIResponsesProvider: ProviderFactory = (config) => + new OpenAIResponsesProvider(config); diff --git a/src/llm/tool-converter.ts b/src/llm/tool-converter.ts new file mode 100644 index 0000000..f351cf6 --- /dev/null +++ b/src/llm/tool-converter.ts @@ -0,0 +1,37 @@ +/** + * Converts internal LLMToolDefinition to provider-native tool formats. + * Called by each adapter in their chat() method. + */ + +import type { LLMToolDefinition } from './types'; + +export function toOpenAITools(tools: LLMToolDefinition[]): object[] { + return tools.map((tool) => ({ + type: 'function' as const, + function: { + name: tool.name, + description: tool.description, + parameters: tool.parameters, + }, + })); +} + +export function toAnthropicTools(tools: LLMToolDefinition[]): object[] { + return tools.map((tool) => ({ + name: tool.name, + description: tool.description, + input_schema: tool.parameters, + })); +} + +export function toGeminiTools(tools: LLMToolDefinition[]): object[] { + return [ + { + functionDeclarations: tools.map((tool) => ({ + name: tool.name, + description: tool.description, + parameters: tool.parameters, + })), + }, + ]; +} diff --git a/src/llm/types.ts b/src/llm/types.ts new file mode 100644 index 0000000..c37c02d --- /dev/null +++ b/src/llm/types.ts @@ -0,0 +1,104 @@ +/** + * Unified LLM types — provider-agnostic request/response format. + * + * All business code uses these types exclusively. + * Provider adapters translate to/from these types. + */ + +// --------------------------------------------------------------------------- +// Model Role +// --------------------------------------------------------------------------- + +/** Business role that maps to a specific provider + model via DB config. */ +export type ModelRole = 'legacy' | 'planner' | 'specialist' | 'judge' | 'embedding'; + +/** All valid model roles. */ +export const MODEL_ROLES: readonly ModelRole[] = [ + 'legacy', + 'planner', + 'specialist', + 'judge', + 'embedding', +] as const; + +// --------------------------------------------------------------------------- +// Provider Type +// --------------------------------------------------------------------------- + +export type ProviderType = 'openai_compatible' | 'openai_responses' | 'anthropic' | 'gemini'; + +export const PROVIDER_TYPES: readonly ProviderType[] = [ + 'openai_compatible', + 'openai_responses', + 'anthropic', + 'gemini', +] as const; + +// --------------------------------------------------------------------------- +// Messages +// --------------------------------------------------------------------------- + +/** Unified message format (internal representation, hides provider differences). */ +export interface LLMMessage { + role: 'system' | 'user' | 'assistant' | 'tool'; + content: string; + /** When role='tool', the ID of the tool call this result corresponds to. */ + toolCallId?: string; + /** When role='assistant', any tool calls the model wants to make. */ + toolCalls?: LLMToolCall[]; +} + +/** A single tool call from the model. */ +export interface LLMToolCall { + id: string; + name: string; + /** JSON-serialized arguments */ + arguments: string; +} + +/** Tool definition in provider-agnostic format. Converted to native format by tool-converter. */ +export interface LLMToolDefinition { + name: string; + description: string; + /** JSON Schema for the tool's parameters */ + parameters: Record; +} + +// --------------------------------------------------------------------------- +// Request +// --------------------------------------------------------------------------- + +/** Unified chat request — provider adapters translate this to native format. */ +export interface LLMChatRequest { + messages: LLMMessage[]; + model: string; + temperature?: number; + maxTokens?: number; + /** Abstract JSON mode: adapters handle native support or prompt injection fallback. */ + responseFormat?: 'text' | 'json'; + tools?: LLMToolDefinition[]; + /** Provider-specific passthrough config (e.g. Anthropic thinking, Gemini safetySettings). */ + providerOptions?: Record; +} + +// --------------------------------------------------------------------------- +// Response +// --------------------------------------------------------------------------- + +/** Unified chat response — provider adapters normalize to this format. */ +export interface LLMChatResponse { + content: string | null; + toolCalls: LLMToolCall[]; + finishReason: LLMFinishReason; + usage: LLMUsage; + /** Raw provider response preserved for debugging. */ + raw?: unknown; +} + +export type LLMFinishReason = 'stop' | 'tool_calls' | 'length' | 'content_filter' | 'error'; + +export interface LLMUsage { + promptTokens: number; + completionTokens: number; + totalTokens: number; +}