feat(llm): add pluggable multi-provider LLM architecture

Introduce provider-agnostic LLM gateway supporting 4 provider types:
OpenAI Compatible, OpenAI Responses API, Anthropic Messages API, and
Google Gemini API. Each provider normalizes to a unified LLMChatResponse
format with tool call support. Includes AES-256-GCM encrypted secret
management for API keys and a tool-converter for cross-provider tool
format translation.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)
This commit is contained in:
jeffusion
2026-03-05 00:31:14 +08:00
committed by 路遥知码力
parent f0d981ad00
commit c9a2db3df2
11 changed files with 1491 additions and 0 deletions

136
src/crypto/secrets.ts Normal file
View File

@@ -0,0 +1,136 @@
/**
* AES-256-GCM encryption module for API key storage.
*
* Master key lifecycle:
* 1. On first startup, generate 32 random bytes → write to DATA_DIR/master.key (chmod 600)
* 2. On subsequent startups, read existing master.key
* 3. Key stays in memory for process lifetime; never logged or exposed via API
*/
import { chmodSync, existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
import { dirname, resolve } from 'node:path';
// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------
const KEY_LENGTH = 32; // AES-256
const IV_LENGTH = 12; // GCM recommended nonce size
// ---------------------------------------------------------------------------
// Master Key Management
// ---------------------------------------------------------------------------
let masterKey: Buffer | null = null;
/**
* Resolve the master key file path.
* Defaults to `data/master.key` relative to CWD, overridable via `MASTER_KEY_PATH` env.
*/
function getMasterKeyPath(): string {
return resolve(process.env.MASTER_KEY_PATH || './data/master.key');
}
/**
* Initialize (load or generate) the master encryption key.
* MUST be called once at application startup before any encrypt/decrypt operations.
*/
export function initMasterKey(): void {
const keyPath = getMasterKeyPath();
if (existsSync(keyPath)) {
const raw = readFileSync(keyPath);
if (raw.length !== KEY_LENGTH) {
throw new Error(
`Master key at ${keyPath} is ${raw.length} bytes, expected ${KEY_LENGTH}. Delete the file and restart to generate a new key (all encrypted API keys will need to be re-entered).`
);
}
masterKey = Buffer.from(raw);
console.log(`🔑 Master key loaded from ${keyPath}`);
} else {
const dir = dirname(keyPath);
mkdirSync(dir, { recursive: true });
const key = Buffer.from(crypto.getRandomValues(new Uint8Array(KEY_LENGTH)));
writeFileSync(keyPath, key, { mode: 0o600 });
try {
chmodSync(keyPath, 0o600);
} catch {
// chmod may fail on some filesystems (e.g. Windows); non-fatal
}
masterKey = key;
console.log(`🔑 Generated new master key at ${keyPath}`);
}
}
/**
* Get the current master key. Throws if not initialized.
*/
function getKey(): Buffer {
if (!masterKey) {
throw new Error('Master key not initialized. Call initMasterKey() at startup.');
}
return masterKey;
}
// ---------------------------------------------------------------------------
// Encryption / Decryption
// ---------------------------------------------------------------------------
export interface EncryptedPayload {
/** AES-256-GCM ciphertext */
ciphertext: Buffer;
/** 12-byte initialization vector / nonce */
iv: Buffer;
/** 16-byte GCM authentication tag */
authTag: Buffer;
}
/**
* Encrypt a plaintext string (e.g. API key) with AES-256-GCM.
*
* @returns Encrypted payload containing ciphertext, IV, and auth tag
*/
export function encrypt(plaintext: string): EncryptedPayload {
const key = getKey();
const iv = Buffer.from(crypto.getRandomValues(new Uint8Array(IV_LENGTH)));
// Use Web Cryptostyle via Node's built-in crypto
const { createCipheriv } = require('node:crypto') as typeof import('node:crypto');
const cipher = createCipheriv('aes-256-gcm', key, iv);
const encrypted = Buffer.concat([cipher.update(plaintext, 'utf8'), cipher.final()]);
const authTag = cipher.getAuthTag();
return {
ciphertext: encrypted,
iv,
authTag,
};
}
/**
* Decrypt an AES-256-GCM encrypted payload back to plaintext.
*
* @throws If auth tag verification fails (tampered data or wrong key)
*/
export function decrypt(payload: EncryptedPayload): string {
const key = getKey();
const { createDecipheriv } = require('node:crypto') as typeof import('node:crypto');
const decipher = createDecipheriv('aes-256-gcm', key, payload.iv);
decipher.setAuthTag(payload.authTag);
const decrypted = Buffer.concat([decipher.update(payload.ciphertext), decipher.final()]);
return decrypted.toString('utf8');
}
/**
* Check if the master key has been initialized.
*/
export function isMasterKeyReady(): boolean {
return masterKey !== null;
}

63
src/llm/capabilities.ts Normal file
View File

@@ -0,0 +1,63 @@
/**
* Provider capability declarations.
*
* Each provider type declares what features it supports natively.
* The Gateway and adapters use this to make runtime decisions
* (e.g. prompt-inject JSON mode for Anthropic, skip embedding calls for non-supporting providers).
*/
import type { ProviderType } from './types';
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
export interface ProviderCapabilities {
/** Supports tool/function calling */
supportsTools: boolean;
/** Supports native JSON mode (vs. prompt injection + manual parsing) */
supportsJsonMode: boolean;
/** Supports SSE streaming */
supportsStreaming: boolean;
/** Supports embedding API */
supportsEmbeddings: boolean;
/** Supports image/multimodal input */
supportsMultimodal: boolean;
/** Max input token count (for pre-validation, avoids wasted API calls) */
maxInputTokens?: number;
}
// ---------------------------------------------------------------------------
// Default capabilities per provider type
// ---------------------------------------------------------------------------
export const DEFAULT_CAPABILITIES: Record<ProviderType, ProviderCapabilities> = {
openai_compatible: {
supportsTools: true,
supportsJsonMode: true,
supportsStreaming: true,
supportsEmbeddings: true,
supportsMultimodal: false, // depends on actual model behind the compatible endpoint
},
openai_responses: {
supportsTools: true,
supportsJsonMode: true,
supportsStreaming: true,
supportsEmbeddings: true,
supportsMultimodal: true,
},
anthropic: {
supportsTools: true,
supportsJsonMode: false, // no native JSON mode; adapter uses prompt injection
supportsStreaming: true,
supportsEmbeddings: false,
supportsMultimodal: true,
},
gemini: {
supportsTools: true,
supportsJsonMode: true, // responseMimeType: 'application/json'
supportsStreaming: true,
supportsEmbeddings: true,
supportsMultimodal: true,
},
};

92
src/llm/errors.ts Normal file
View File

@@ -0,0 +1,92 @@
/**
* Standardized error types for the LLM layer.
*
* All provider adapters catch native errors and re-throw as one of these.
* Business code only needs to handle LLMError subtypes.
*/
// ---------------------------------------------------------------------------
// Base error
// ---------------------------------------------------------------------------
export class LLMError extends Error {
/** Provider type that caused the error */
readonly provider: string;
/** HTTP status code from the provider, if available */
readonly statusCode?: number;
constructor(message: string, provider: string, statusCode?: number) {
super(message);
this.name = 'LLMError';
this.provider = provider;
this.statusCode = statusCode;
}
}
// ---------------------------------------------------------------------------
// Specific error types
// ---------------------------------------------------------------------------
/** Authentication failed (invalid/expired API key). */
export class LLMAuthError extends LLMError {
constructor(provider: string, message?: string) {
super(message || 'Authentication failed: invalid or expired API key', provider, 401);
this.name = 'LLMAuthError';
}
}
/** Rate limit exceeded. */
export class LLMRateLimitError extends LLMError {
/** Seconds until the rate limit resets, if provided by the provider. */
readonly retryAfterSeconds?: number;
constructor(provider: string, retryAfterSeconds?: number, message?: string) {
super(
message ||
`Rate limit exceeded${retryAfterSeconds ? ` (retry after ${retryAfterSeconds}s)` : ''}`,
provider,
429
);
this.name = 'LLMRateLimitError';
this.retryAfterSeconds = retryAfterSeconds;
}
}
/** Input too long / context window exceeded. */
export class LLMContextLengthError extends LLMError {
constructor(provider: string, message?: string) {
super(message || 'Input exceeds model context length', provider, 400);
this.name = 'LLMContextLengthError';
}
}
/** Provider returned an invalid or unparseable response. */
export class LLMResponseError extends LLMError {
/** The raw response body that couldn't be parsed */
readonly rawResponse?: unknown;
constructor(provider: string, message?: string, rawResponse?: unknown) {
super(message || 'Invalid response from provider', provider);
this.name = 'LLMResponseError';
this.rawResponse = rawResponse;
}
}
/** Provider is unreachable (network error, timeout, 5xx). */
export class LLMConnectionError extends LLMError {
constructor(provider: string, message?: string, statusCode?: number) {
super(message || 'Failed to connect to provider', provider, statusCode);
this.name = 'LLMConnectionError';
}
}
/** No provider is configured for the requested role. */
export class LLMNoProviderError extends LLMError {
readonly role: string;
constructor(role: string) {
super(`No provider configured for role '${role}'`, 'gateway');
this.name = 'LLMNoProviderError';
this.role = role;
}
}

140
src/llm/gateway.ts Normal file
View File

@@ -0,0 +1,140 @@
/**
* LLM Gateway — the sole entry point for all business-layer LLM calls.
*
* Responsibilities:
* 1. Look up model_role_assignments → provider_id + model for a given role
* 2. Load (or cache) LLMProvider instances with decrypted API keys
* 3. Route chat() calls to the correct adapter
* 4. Invalidate cache when provider config changes via UI
*/
import { type ModelRole, modelRoleRepo } from '../db/repositories/model-role-repo';
import { providerRepo } from '../db/repositories/provider-repo';
import { secretRepo } from '../db/repositories/secret-repo';
import { LLMAuthError, LLMError, LLMNoProviderError } from './errors';
import { createAnthropicProvider } from './providers/anthropic';
import type { LLMProvider } from './providers/base';
import { createGeminiProvider } from './providers/gemini';
import { createOpenAICompatibleProvider } from './providers/openai-compatible';
import { createOpenAIResponsesProvider } from './providers/openai-responses';
import type { LLMChatRequest, LLMChatResponse, ProviderType } from './types';
type ProviderFactoryFn = (config: {
baseUrl?: string;
apiKey: string;
defaultModel: string;
extraConfig: Record<string, unknown>;
}) => LLMProvider;
const PROVIDER_FACTORIES: Record<ProviderType, ProviderFactoryFn> = {
openai_compatible: createOpenAICompatibleProvider,
openai_responses: createOpenAIResponsesProvider,
anthropic: createAnthropicProvider,
gemini: createGeminiProvider,
};
export class LLMGateway {
private cache = new Map<string, LLMProvider>();
/**
* Call LLM by business role. The role determines which provider + model to use.
* The `model` field in the request is ignored — it's resolved from the DB assignment.
*/
async chatForRole(
role: ModelRole,
request: Omit<LLMChatRequest, 'model'>
): Promise<LLMChatResponse> {
const assignment = modelRoleRepo.getByRole(role);
if (!assignment) throw new LLMNoProviderError(role);
const provider = this.getOrCreateProvider(assignment.provider_id);
return provider.chat({ ...request, model: assignment.model });
}
/**
* Direct call to a specific provider (used for connection testing).
*/
async chatDirect(providerId: string, request: LLMChatRequest): Promise<LLMChatResponse> {
const provider = this.getOrCreateProvider(providerId);
return provider.chat(request);
}
/**
* Embedding via the provider assigned to the 'embedding' role.
*/
async embedForRole(texts: string[]): Promise<number[][]> {
const assignment = modelRoleRepo.getByRole('embedding');
if (!assignment) throw new LLMNoProviderError('embedding');
const provider = this.getOrCreateProvider(assignment.provider_id);
if (!provider.embed) {
throw new LLMError(`Provider '${provider.type}' does not support embeddings`, provider.type);
}
return provider.embed(texts);
}
/**
* Invalidate cached provider instance (call when config/key changes via UI).
*/
invalidateProvider(providerId: string): void {
this.cache.delete(providerId);
}
/**
* Clear all cached providers (call on global config change).
*/
invalidateAll(): void {
this.cache.clear();
}
/** Get or create a provider instance (for direct access like listing models). */
getProviderInstance(providerId: string): LLMProvider {
return this.getOrCreateProvider(providerId);
}
private getOrCreateProvider(providerId: string): LLMProvider {
const cached = this.cache.get(providerId);
if (cached) return cached;
const providerRow = providerRepo.getById(providerId);
if (!providerRow) {
throw new LLMError(`Provider '${providerId}' not found in database`, 'gateway');
}
if (!providerRow.is_enabled) {
throw new LLMError(`Provider '${providerRow.name}' is disabled`, 'gateway');
}
const apiKey = secretRepo.get(providerId);
if (!apiKey) {
throw new LLMAuthError(
providerRow.type,
`No API key configured for provider '${providerRow.name}'`
);
}
const factory = PROVIDER_FACTORIES[providerRow.type as ProviderType];
if (!factory) {
throw new LLMError(`Unknown provider type '${providerRow.type}'`, providerRow.type);
}
let extraConfig: Record<string, unknown> = {};
try {
extraConfig = JSON.parse(providerRow.extra_config || '{}');
} catch {
// Invalid JSON in extra_config — use empty
}
const provider = factory({
baseUrl: providerRow.base_url || undefined,
apiKey,
defaultModel: providerRow.default_model,
extraConfig,
});
this.cache.set(providerId, provider);
return provider;
}
}
// Singleton instance
export const llmGateway = new LLMGateway();

View File

@@ -0,0 +1,252 @@
/**
* Anthropic Messages API adapter.
*
* Key differences from OpenAI:
* - System message → `system` top-level param (not in messages array)
* - No native JSON mode → prompt injection + JSON.parse with regex fallback
* - Tool results → role='user' with tool_result content blocks
* - Finish reasons: end_turn→stop, tool_use→tool_calls, max_tokens→length
*/
import Anthropic from '@anthropic-ai/sdk';
import { DEFAULT_CAPABILITIES, type ProviderCapabilities } from '../capabilities';
import {
LLMAuthError,
LLMConnectionError,
LLMContextLengthError,
LLMRateLimitError,
LLMResponseError,
} from '../errors';
import { toAnthropicTools } from '../tool-converter';
import type {
LLMChatRequest,
LLMChatResponse,
LLMFinishReason,
LLMMessage,
LLMToolCall,
} from '../types';
import type { LLMProvider, ProviderFactory } from './base';
const TYPE = 'anthropic';
const JSON_MODE_SUFFIX =
'\n\nYou MUST respond with valid JSON only. No markdown, no explanation, no other text outside the JSON object.';
function mapStopReason(reason: string | null | undefined): LLMFinishReason {
switch (reason) {
case 'end_turn':
return 'stop';
case 'tool_use':
return 'tool_calls';
case 'max_tokens':
return 'length';
default:
return 'stop';
}
}
/**
* Separate system messages from conversation messages.
* Anthropic requires system as a top-level param, not inside the messages array.
*/
function convertMessages(
messages: LLMMessage[],
jsonMode: boolean
): { system: string | undefined; anthropicMessages: Anthropic.MessageParam[] } {
let system: string | undefined;
const anthropicMessages: Anthropic.MessageParam[] = [];
for (const msg of messages) {
if (msg.role === 'system') {
const text = jsonMode ? msg.content + JSON_MODE_SUFFIX : msg.content;
system = system ? `${system}\n${text}` : text;
continue;
}
if (msg.role === 'tool') {
// Tool results go as user messages with tool_result content block
anthropicMessages.push({
role: 'user',
content: [
{
type: 'tool_result',
tool_use_id: msg.toolCallId!,
content: msg.content,
},
],
});
continue;
}
if (msg.role === 'assistant' && msg.toolCalls?.length) {
const content: Anthropic.ContentBlockParam[] = [];
if (msg.content) {
content.push({ type: 'text', text: msg.content });
}
for (const tc of msg.toolCalls) {
content.push({
type: 'tool_use',
id: tc.id,
name: tc.name,
input: JSON.parse(tc.arguments),
});
}
anthropicMessages.push({ role: 'assistant', content });
continue;
}
anthropicMessages.push({
role: msg.role as 'user' | 'assistant',
content: msg.content,
});
}
// If JSON mode requested but no system message existed, create one
if (jsonMode && !system) {
system = JSON_MODE_SUFFIX.trim();
}
return { system, anthropicMessages };
}
/**
* Try to extract JSON from content that might have markdown wrapping.
* Falls back to returning the raw content.
*/
function extractJson(content: string): string {
// Try direct parse first
try {
JSON.parse(content);
return content;
} catch {
// Noop — try regex extraction
}
// Try extracting from ```json ... ``` blocks
const jsonBlockMatch = content.match(/```(?:json)?\s*\n?([\s\S]*?)\n?\s*```/);
if (jsonBlockMatch?.[1]) {
try {
JSON.parse(jsonBlockMatch[1].trim());
return jsonBlockMatch[1].trim();
} catch {
// Noop
}
}
// Try finding first { ... } or [ ... ]
const objectMatch = content.match(/\{[\s\S]*\}/);
if (objectMatch?.[0]) {
try {
JSON.parse(objectMatch[0]);
return objectMatch[0];
} catch {
// Noop
}
}
return content;
}
class AnthropicProvider implements LLMProvider {
readonly type = TYPE;
readonly capabilities: ProviderCapabilities;
private client: Anthropic;
private defaultModel: string;
constructor(config: {
baseUrl?: string;
apiKey: string;
defaultModel: string;
extraConfig: Record<string, unknown>;
}) {
this.client = new Anthropic({
apiKey: config.apiKey,
...(config.baseUrl ? { baseURL: config.baseUrl } : {}),
});
this.defaultModel = config.defaultModel;
this.capabilities = { ...DEFAULT_CAPABILITIES.anthropic };
}
async chat(request: LLMChatRequest): Promise<LLMChatResponse> {
const model = request.model || this.defaultModel;
const jsonMode = request.responseFormat === 'json';
const { system, anthropicMessages } = convertMessages(request.messages, jsonMode);
const params: Anthropic.MessageCreateParamsNonStreaming = {
model,
messages: anthropicMessages,
max_tokens: request.maxTokens ?? 4096,
...(system ? { system } : {}),
...(request.temperature !== undefined ? { temperature: request.temperature } : {}),
...(request.tools?.length ? { tools: toAnthropicTools(request.tools) as any } : {}),
};
try {
const response = await this.client.messages.create(params);
let content: string | null = null;
const toolCalls: LLMToolCall[] = [];
for (const block of response.content) {
if (block.type === 'text') {
content = (content || '') + block.text;
} else if (block.type === 'tool_use') {
toolCalls.push({
id: block.id,
name: block.name,
arguments: JSON.stringify(block.input),
});
}
}
// JSON mode fallback: extract JSON from content
if (jsonMode && content) {
content = extractJson(content);
}
return {
content,
toolCalls,
finishReason: mapStopReason(response.stop_reason),
usage: {
promptTokens: response.usage.input_tokens,
completionTokens: response.usage.output_tokens,
totalTokens: response.usage.input_tokens + response.usage.output_tokens,
},
raw: response,
};
} catch (error) {
throw this.wrapError(error);
}
}
async listModels(): Promise<string[]> {
const response = await this.client.models.list();
const models: string[] = [];
for await (const page of response.iterPages()) {
models.push(...page.data.map((model) => model.id));
}
return models;
}
private wrapError(error: unknown): Error {
if (error instanceof Anthropic.APIError) {
if (error.status === 401) return new LLMAuthError(TYPE, error.message);
if (error.status === 429) return new LLMRateLimitError(TYPE);
if (error.status === 400 && error.message?.includes('too long'))
return new LLMContextLengthError(TYPE, error.message);
if (error.status && error.status >= 500)
return new LLMConnectionError(TYPE, error.message, error.status);
return new LLMResponseError(TYPE, error.message);
}
if (
error instanceof Error &&
(error.message.includes('ECONNREFUSED') || error.message.includes('fetch'))
) {
return new LLMConnectionError(TYPE, error.message);
}
return error instanceof Error ? error : new Error(String(error));
}
}
export const createAnthropicProvider: ProviderFactory = (config) => new AnthropicProvider(config);

50
src/llm/providers/base.ts Normal file
View File

@@ -0,0 +1,50 @@
/**
* LLMProvider abstract interface.
*
* Each adapter (openai-compatible, openai-responses, anthropic, gemini)
* implements this interface. The Gateway only interacts with providers
* through this contract.
*/
import type { ProviderCapabilities } from '../capabilities';
import type { LLMChatRequest, LLMChatResponse } from '../types';
// ---------------------------------------------------------------------------
// Provider Interface
// ---------------------------------------------------------------------------
export interface LLMProvider {
/** Provider type identifier (e.g. 'openai_compatible', 'anthropic'). */
readonly type: string;
/** Capability declaration for this provider. */
readonly capabilities: ProviderCapabilities;
/**
* Core chat method. The Gateway only calls this method.
*
* Each adapter is responsible for:
* 1. Converting LLMChatRequest to provider-native format
* 2. Making the HTTP/SDK call
* 3. Converting the native response to LLMChatResponse
*/
chat(request: LLMChatRequest): Promise<LLMChatResponse>;
/** Optional: embedding interface (only for providers that support it). */
embed?(texts: string[]): Promise<number[][]>;
/** Optional: list available models for this provider. */
listModels?(): Promise<string[]>;
}
// ---------------------------------------------------------------------------
// Provider Factory
// ---------------------------------------------------------------------------
/** Factory function signature: create an LLMProvider from DB config + decrypted API key. */
export type ProviderFactory = (config: {
baseUrl?: string;
apiKey: string;
defaultModel: string;
extraConfig: Record<string, unknown>;
}) => LLMProvider;

234
src/llm/providers/gemini.ts Normal file
View File

@@ -0,0 +1,234 @@
/**
* Google Gemini generateContent API adapter.
*
* Key differences from OpenAI:
* - System message → systemInstruction param
* - Role 'assistant' → 'model'
* - Tool calls → functionCall parts; tool results → functionResponse parts
* - Finish reasons: STOP, FUNCTION_CALL (unofficial—actually STOP), MAX_TOKENS, SAFETY
* - Token usage: usageMetadata.{promptTokenCount, candidatesTokenCount}
*/
import { type Content, GoogleGenAI, type Part } from '@google/genai';
import { DEFAULT_CAPABILITIES, type ProviderCapabilities } from '../capabilities';
import {
LLMAuthError,
LLMConnectionError,
LLMContextLengthError,
LLMRateLimitError,
LLMResponseError,
} from '../errors';
import { toGeminiTools } from '../tool-converter';
import type {
LLMChatRequest,
LLMChatResponse,
LLMFinishReason,
LLMMessage,
LLMToolCall,
} from '../types';
import type { LLMProvider, ProviderFactory } from './base';
const TYPE = 'gemini';
function mapFinishReason(reason: string | undefined): LLMFinishReason {
switch (reason) {
case 'STOP':
return 'stop';
case 'MAX_TOKENS':
return 'length';
case 'SAFETY':
return 'content_filter';
case 'RECITATION':
return 'content_filter';
default:
return 'stop';
}
}
function convertMessages(messages: LLMMessage[]): {
systemInstruction: string | undefined;
contents: Content[];
} {
let systemInstruction: string | undefined;
const contents: Content[] = [];
for (const msg of messages) {
if (msg.role === 'system') {
systemInstruction = systemInstruction ? `${systemInstruction}\n${msg.content}` : msg.content;
continue;
}
if (msg.role === 'tool') {
// Tool results → function role with functionResponse part
contents.push({
role: 'function',
parts: [
{
functionResponse: {
name: msg.toolCallId || 'unknown',
response: safeParseJson(msg.content),
},
} as Part,
],
});
continue;
}
if (msg.role === 'assistant' && msg.toolCalls?.length) {
const parts: Part[] = [];
if (msg.content) {
parts.push({ text: msg.content });
}
for (const tc of msg.toolCalls) {
parts.push({
functionCall: {
name: tc.name,
args: safeParseJson(tc.arguments),
},
} as Part);
}
contents.push({ role: 'model', parts });
continue;
}
// Gemini uses 'model' instead of 'assistant'
const role = msg.role === 'assistant' ? 'model' : 'user';
contents.push({
role,
parts: [{ text: msg.content }],
});
}
return { systemInstruction, contents };
}
function safeParseJson(str: string): any {
try {
return JSON.parse(str);
} catch {
return { result: str };
}
}
class GeminiProvider implements LLMProvider {
readonly type = TYPE;
readonly capabilities: ProviderCapabilities;
private genAI: GoogleGenAI;
private defaultModel: string;
constructor(config: {
baseUrl?: string;
apiKey: string;
defaultModel: string;
extraConfig: Record<string, unknown>;
}) {
this.genAI = new GoogleGenAI({ apiKey: config.apiKey });
this.defaultModel = config.defaultModel;
this.capabilities = { ...DEFAULT_CAPABILITIES.gemini };
}
async chat(request: LLMChatRequest): Promise<LLMChatResponse> {
const modelId = request.model || this.defaultModel;
const { systemInstruction, contents } = convertMessages(request.messages);
try {
const response = await this.genAI.models.generateContent({
model: modelId,
contents,
config: {
...(systemInstruction ? { systemInstruction } : {}),
...(request.temperature !== undefined ? { temperature: request.temperature } : {}),
...(request.maxTokens !== undefined ? { maxOutputTokens: request.maxTokens } : {}),
...(request.responseFormat === 'json' ? { responseMimeType: 'application/json' } : {}),
...(request.tools?.length ? { tools: toGeminiTools(request.tools) as any } : {}),
},
});
const candidate = response.candidates?.[0];
let content: string | null = null;
const toolCalls: LLMToolCall[] = [];
if (candidate?.content?.parts) {
for (const part of candidate.content.parts) {
if ('text' in part && part.text) {
content = (content || '') + part.text;
}
if ('functionCall' in part && part.functionCall) {
const toolName = String(part.functionCall.name ?? 'unknown');
toolCalls.push({
id: `call_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
name: toolName,
arguments: JSON.stringify(part.functionCall.args || {}),
});
}
}
}
const hasFunctionCalls = toolCalls.length > 0;
const finishReason = hasFunctionCalls
? 'tool_calls'
: mapFinishReason(candidate?.finishReason as string | undefined);
const usage = response.usageMetadata;
return {
content,
toolCalls,
finishReason,
usage: {
promptTokens: usage?.promptTokenCount ?? 0,
completionTokens: usage?.candidatesTokenCount ?? 0,
totalTokens: usage?.totalTokenCount ?? 0,
},
raw: response,
};
} catch (error) {
throw this.wrapError(error);
}
}
async embed(texts: string[]): Promise<number[][]> {
try {
const results: number[][] = [];
for (const text of texts) {
const result = await this.genAI.models.embedContent({
model: this.defaultModel,
contents: text,
});
results.push(result.embeddings?.[0]?.values ?? []);
}
return results;
} catch (error) {
throw this.wrapError(error);
}
}
async listModels(): Promise<string[]> {
const pager = await this.genAI.models.list();
const models: string[] = [];
for await (const model of pager) {
if (model.name) {
models.push(model.name.replace('models/', ''));
}
}
return models;
}
private wrapError(error: unknown): Error {
if (error instanceof Error) {
const msg = error.message;
if (msg.includes('API_KEY_INVALID') || msg.includes('401'))
return new LLMAuthError(TYPE, msg);
if (msg.includes('429') || msg.includes('RESOURCE_EXHAUSTED'))
return new LLMRateLimitError(TYPE);
if (msg.includes('context') || msg.includes('too long'))
return new LLMContextLengthError(TYPE, msg);
if (msg.includes('ECONNREFUSED') || msg.includes('fetch') || msg.includes('500'))
return new LLMConnectionError(TYPE, msg);
return new LLMResponseError(TYPE, msg);
}
return new Error(String(error));
}
}
export const createGeminiProvider: ProviderFactory = (config) => new GeminiProvider(config);

View File

@@ -0,0 +1,172 @@
/**
* OpenAI Compatible adapter — for any service implementing the OpenAI chat.completions API.
* This is the existing format used by the codebase today.
*/
import OpenAI from 'openai';
import { DEFAULT_CAPABILITIES, type ProviderCapabilities } from '../capabilities';
import {
LLMAuthError,
LLMConnectionError,
LLMContextLengthError,
LLMRateLimitError,
LLMResponseError,
} from '../errors';
import { toOpenAITools } from '../tool-converter';
import type {
LLMChatRequest,
LLMChatResponse,
LLMFinishReason,
LLMMessage,
LLMToolCall,
} from '../types';
import type { LLMProvider, ProviderFactory } from './base';
const TYPE = 'openai_compatible';
function mapFinishReason(reason: string | null | undefined): LLMFinishReason {
switch (reason) {
case 'stop':
return 'stop';
case 'tool_calls':
return 'tool_calls';
case 'length':
return 'length';
case 'content_filter':
return 'content_filter';
default:
return 'stop';
}
}
function toOpenAIMessages(messages: LLMMessage[]): OpenAI.ChatCompletionMessageParam[] {
return messages.map((msg) => {
if (msg.role === 'tool') {
return {
role: 'tool' as const,
content: msg.content,
tool_call_id: msg.toolCallId!,
};
}
if (msg.role === 'assistant' && msg.toolCalls?.length) {
return {
role: 'assistant' as const,
content: msg.content || null,
tool_calls: msg.toolCalls.map((tc) => ({
id: tc.id,
type: 'function' as const,
function: { name: tc.name, arguments: tc.arguments },
})),
};
}
return {
role: msg.role as 'system' | 'user' | 'assistant',
content: msg.content,
};
});
}
function extractToolCalls(choice: OpenAI.ChatCompletion.Choice): LLMToolCall[] {
const toolCalls = choice.message.tool_calls;
if (!toolCalls?.length) return [];
return toolCalls.map((tc) => ({
id: tc.id,
name: tc.function.name,
arguments: tc.function.arguments,
}));
}
class OpenAICompatibleProvider implements LLMProvider {
readonly type = TYPE;
readonly capabilities: ProviderCapabilities;
private client: OpenAI;
private defaultModel: string;
constructor(config: {
baseUrl?: string;
apiKey: string;
defaultModel: string;
extraConfig: Record<string, unknown>;
}) {
this.client = new OpenAI({
baseURL: config.baseUrl || 'https://api.openai.com/v1',
apiKey: config.apiKey,
...(config.extraConfig.organization
? { organization: config.extraConfig.organization as string }
: {}),
});
this.defaultModel = config.defaultModel;
this.capabilities = { ...DEFAULT_CAPABILITIES.openai_compatible };
}
async chat(request: LLMChatRequest): Promise<LLMChatResponse> {
const model = request.model || this.defaultModel;
const params: OpenAI.ChatCompletionCreateParamsNonStreaming = {
model,
messages: toOpenAIMessages(request.messages),
...(request.temperature !== undefined ? { temperature: request.temperature } : {}),
...(request.maxTokens !== undefined ? { max_tokens: request.maxTokens } : {}),
...(request.responseFormat === 'json' ? { response_format: { type: 'json_object' } } : {}),
...(request.tools?.length ? { tools: toOpenAITools(request.tools) as any } : {}),
};
try {
const response = await this.client.chat.completions.create(params);
const choice = response.choices[0];
return {
content: choice?.message?.content ?? null,
toolCalls: choice ? extractToolCalls(choice) : [],
finishReason: mapFinishReason(choice?.finish_reason),
usage: {
promptTokens: response.usage?.prompt_tokens ?? 0,
completionTokens: response.usage?.completion_tokens ?? 0,
totalTokens: response.usage?.total_tokens ?? 0,
},
raw: response,
};
} catch (error) {
throw this.wrapError(error);
}
}
async embed(texts: string[]): Promise<number[][]> {
try {
const response = await this.client.embeddings.create({
model: this.defaultModel,
input: texts,
});
return response.data.map((d) => d.embedding);
} catch (error) {
throw this.wrapError(error);
}
}
async listModels(): Promise<string[]> {
const response = await this.client.models.list();
return response.data.map((model) => model.id);
}
private wrapError(error: unknown): Error {
if (error instanceof OpenAI.APIError) {
if (error.status === 401) return new LLMAuthError(TYPE, error.message);
if (error.status === 429) return new LLMRateLimitError(TYPE);
if (error.status === 400 && error.message?.includes('context_length'))
return new LLMContextLengthError(TYPE, error.message);
if (error.status && error.status >= 500)
return new LLMConnectionError(TYPE, error.message, error.status);
return new LLMResponseError(TYPE, error.message);
}
if (
error instanceof Error &&
(error.message.includes('ECONNREFUSED') || error.message.includes('fetch'))
) {
return new LLMConnectionError(TYPE, error.message);
}
return error instanceof Error ? error : new Error(String(error));
}
}
export const createOpenAICompatibleProvider: ProviderFactory = (config) =>
new OpenAICompatibleProvider(config);

View File

@@ -0,0 +1,211 @@
/**
* OpenAI Responses API adapter — uses the newer responses.create endpoint.
* System instructions are passed as the `instructions` parameter instead of a system message.
*/
import OpenAI from 'openai';
import { DEFAULT_CAPABILITIES, type ProviderCapabilities } from '../capabilities';
import {
LLMAuthError,
LLMConnectionError,
LLMContextLengthError,
LLMRateLimitError,
LLMResponseError,
} from '../errors';
import { toOpenAITools } from '../tool-converter';
import type {
LLMChatRequest,
LLMChatResponse,
LLMFinishReason,
LLMMessage,
LLMToolCall,
} from '../types';
import type { LLMProvider, ProviderFactory } from './base';
const TYPE = 'openai_responses';
function mapStatus(status: string | undefined): LLMFinishReason {
switch (status) {
case 'completed':
return 'stop';
case 'incomplete':
return 'length';
default:
return 'stop';
}
}
/**
* Extract system instructions and build input items from unified messages.
* System messages → `instructions` parameter.
* Remaining messages → `input` array.
*/
function convertMessages(messages: LLMMessage[]): {
instructions: string | undefined;
input: any[];
} {
let instructions: string | undefined;
const input: any[] = [];
for (const msg of messages) {
if (msg.role === 'system') {
instructions = instructions ? `${instructions}\n${msg.content}` : msg.content;
continue;
}
if (msg.role === 'tool') {
input.push({
type: 'function_call_output',
call_id: msg.toolCallId,
output: msg.content,
});
continue;
}
if (msg.role === 'assistant' && msg.toolCalls?.length) {
// Emit function_call items for each tool call
for (const tc of msg.toolCalls) {
input.push({
type: 'function_call',
id: tc.id,
name: tc.name,
arguments: tc.arguments,
});
}
// Also emit text if present
if (msg.content) {
input.push({ role: 'assistant', content: msg.content });
}
continue;
}
input.push({
role: msg.role,
content: msg.content,
});
}
return { instructions, input };
}
class OpenAIResponsesProvider implements LLMProvider {
readonly type = TYPE;
readonly capabilities: ProviderCapabilities;
private client: OpenAI;
private defaultModel: string;
constructor(config: {
baseUrl?: string;
apiKey: string;
defaultModel: string;
extraConfig: Record<string, unknown>;
}) {
this.client = new OpenAI({
baseURL: config.baseUrl || undefined,
apiKey: config.apiKey,
});
this.defaultModel = config.defaultModel;
this.capabilities = { ...DEFAULT_CAPABILITIES.openai_responses };
}
async chat(request: LLMChatRequest): Promise<LLMChatResponse> {
const model = request.model || this.defaultModel;
const { instructions, input } = convertMessages(request.messages);
const params: any = {
model,
input,
...(instructions ? { instructions } : {}),
...(request.temperature !== undefined ? { temperature: request.temperature } : {}),
...(request.maxTokens !== undefined ? { max_output_tokens: request.maxTokens } : {}),
...(request.responseFormat === 'json' ? { text: { format: { type: 'json_object' } } } : {}),
...(request.tools?.length
? {
tools: toOpenAITools(request.tools).map((t: any) => ({
type: 'function',
...t.function,
})),
}
: {}),
};
try {
const response = await this.client.responses.create(params);
// Extract content and tool calls from output
let content: string | null = null;
const toolCalls: LLMToolCall[] = [];
for (const item of response.output || []) {
if (item.type === 'message') {
for (const block of (item as any).content || []) {
if (block.type === 'output_text') {
content = (content || '') + block.text;
}
}
} else if (item.type === 'function_call') {
toolCalls.push({
id: (item as any).id || (item as any).call_id || '',
name: (item as any).name,
arguments: (item as any).arguments || '{}',
});
}
}
return {
content,
toolCalls,
finishReason: toolCalls.length > 0 ? 'tool_calls' : mapStatus(response.status),
usage: {
promptTokens: (response.usage as any)?.input_tokens ?? 0,
completionTokens: (response.usage as any)?.output_tokens ?? 0,
totalTokens:
((response.usage as any)?.input_tokens ?? 0) +
((response.usage as any)?.output_tokens ?? 0),
},
raw: response,
};
} catch (error) {
throw this.wrapError(error);
}
}
async embed(texts: string[]): Promise<number[][]> {
try {
const response = await this.client.embeddings.create({
model: this.defaultModel,
input: texts,
});
return response.data.map((d) => d.embedding);
} catch (error) {
throw this.wrapError(error);
}
}
async listModels(): Promise<string[]> {
const response = await this.client.models.list();
return response.data.map((model) => model.id);
}
private wrapError(error: unknown): Error {
if (error instanceof OpenAI.APIError) {
if (error.status === 401) return new LLMAuthError(TYPE, error.message);
if (error.status === 429) return new LLMRateLimitError(TYPE);
if (error.status === 400 && error.message?.includes('context_length'))
return new LLMContextLengthError(TYPE, error.message);
if (error.status && error.status >= 500)
return new LLMConnectionError(TYPE, error.message, error.status);
return new LLMResponseError(TYPE, error.message);
}
if (
error instanceof Error &&
(error.message.includes('ECONNREFUSED') || error.message.includes('fetch'))
) {
return new LLMConnectionError(TYPE, error.message);
}
return error instanceof Error ? error : new Error(String(error));
}
}
export const createOpenAIResponsesProvider: ProviderFactory = (config) =>
new OpenAIResponsesProvider(config);

37
src/llm/tool-converter.ts Normal file
View File

@@ -0,0 +1,37 @@
/**
* Converts internal LLMToolDefinition to provider-native tool formats.
* Called by each adapter in their chat() method.
*/
import type { LLMToolDefinition } from './types';
export function toOpenAITools(tools: LLMToolDefinition[]): object[] {
return tools.map((tool) => ({
type: 'function' as const,
function: {
name: tool.name,
description: tool.description,
parameters: tool.parameters,
},
}));
}
export function toAnthropicTools(tools: LLMToolDefinition[]): object[] {
return tools.map((tool) => ({
name: tool.name,
description: tool.description,
input_schema: tool.parameters,
}));
}
export function toGeminiTools(tools: LLMToolDefinition[]): object[] {
return [
{
functionDeclarations: tools.map((tool) => ({
name: tool.name,
description: tool.description,
parameters: tool.parameters,
})),
},
];
}

104
src/llm/types.ts Normal file
View File

@@ -0,0 +1,104 @@
/**
* Unified LLM types — provider-agnostic request/response format.
*
* All business code uses these types exclusively.
* Provider adapters translate to/from these types.
*/
// ---------------------------------------------------------------------------
// Model Role
// ---------------------------------------------------------------------------
/** Business role that maps to a specific provider + model via DB config. */
export type ModelRole = 'legacy' | 'planner' | 'specialist' | 'judge' | 'embedding';
/** All valid model roles. */
export const MODEL_ROLES: readonly ModelRole[] = [
'legacy',
'planner',
'specialist',
'judge',
'embedding',
] as const;
// ---------------------------------------------------------------------------
// Provider Type
// ---------------------------------------------------------------------------
export type ProviderType = 'openai_compatible' | 'openai_responses' | 'anthropic' | 'gemini';
export const PROVIDER_TYPES: readonly ProviderType[] = [
'openai_compatible',
'openai_responses',
'anthropic',
'gemini',
] as const;
// ---------------------------------------------------------------------------
// Messages
// ---------------------------------------------------------------------------
/** Unified message format (internal representation, hides provider differences). */
export interface LLMMessage {
role: 'system' | 'user' | 'assistant' | 'tool';
content: string;
/** When role='tool', the ID of the tool call this result corresponds to. */
toolCallId?: string;
/** When role='assistant', any tool calls the model wants to make. */
toolCalls?: LLMToolCall[];
}
/** A single tool call from the model. */
export interface LLMToolCall {
id: string;
name: string;
/** JSON-serialized arguments */
arguments: string;
}
/** Tool definition in provider-agnostic format. Converted to native format by tool-converter. */
export interface LLMToolDefinition {
name: string;
description: string;
/** JSON Schema for the tool's parameters */
parameters: Record<string, unknown>;
}
// ---------------------------------------------------------------------------
// Request
// ---------------------------------------------------------------------------
/** Unified chat request — provider adapters translate this to native format. */
export interface LLMChatRequest {
messages: LLMMessage[];
model: string;
temperature?: number;
maxTokens?: number;
/** Abstract JSON mode: adapters handle native support or prompt injection fallback. */
responseFormat?: 'text' | 'json';
tools?: LLMToolDefinition[];
/** Provider-specific passthrough config (e.g. Anthropic thinking, Gemini safetySettings). */
providerOptions?: Record<string, unknown>;
}
// ---------------------------------------------------------------------------
// Response
// ---------------------------------------------------------------------------
/** Unified chat response — provider adapters normalize to this format. */
export interface LLMChatResponse {
content: string | null;
toolCalls: LLMToolCall[];
finishReason: LLMFinishReason;
usage: LLMUsage;
/** Raw provider response preserved for debugging. */
raw?: unknown;
}
export type LLMFinishReason = 'stop' | 'tool_calls' | 'length' | 'content_filter' | 'error';
export interface LLMUsage {
promptTokens: number;
completionTokens: number;
totalTokens: number;
}