mirror of
https://github.com/jeffusion/gitea-ai-assistant.git
synced 2026-03-27 10:05:50 +00:00
feat(llm): add pluggable multi-provider LLM architecture
Introduce provider-agnostic LLM gateway supporting 4 provider types: OpenAI Compatible, OpenAI Responses API, Anthropic Messages API, and Google Gemini API. Each provider normalizes to a unified LLMChatResponse format with tool call support. Includes AES-256-GCM encrypted secret management for API keys and a tool-converter for cross-provider tool format translation. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)
This commit is contained in:
136
src/crypto/secrets.ts
Normal file
136
src/crypto/secrets.ts
Normal file
@@ -0,0 +1,136 @@
|
||||
/**
|
||||
* AES-256-GCM encryption module for API key storage.
|
||||
*
|
||||
* Master key lifecycle:
|
||||
* 1. On first startup, generate 32 random bytes → write to DATA_DIR/master.key (chmod 600)
|
||||
* 2. On subsequent startups, read existing master.key
|
||||
* 3. Key stays in memory for process lifetime; never logged or exposed via API
|
||||
*/
|
||||
|
||||
import { chmodSync, existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
||||
import { dirname, resolve } from 'node:path';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const KEY_LENGTH = 32; // AES-256
|
||||
const IV_LENGTH = 12; // GCM recommended nonce size
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Master Key Management
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
let masterKey: Buffer | null = null;
|
||||
|
||||
/**
|
||||
* Resolve the master key file path.
|
||||
* Defaults to `data/master.key` relative to CWD, overridable via `MASTER_KEY_PATH` env.
|
||||
*/
|
||||
function getMasterKeyPath(): string {
|
||||
return resolve(process.env.MASTER_KEY_PATH || './data/master.key');
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize (load or generate) the master encryption key.
|
||||
* MUST be called once at application startup before any encrypt/decrypt operations.
|
||||
*/
|
||||
export function initMasterKey(): void {
|
||||
const keyPath = getMasterKeyPath();
|
||||
|
||||
if (existsSync(keyPath)) {
|
||||
const raw = readFileSync(keyPath);
|
||||
if (raw.length !== KEY_LENGTH) {
|
||||
throw new Error(
|
||||
`Master key at ${keyPath} is ${raw.length} bytes, expected ${KEY_LENGTH}. Delete the file and restart to generate a new key (all encrypted API keys will need to be re-entered).`
|
||||
);
|
||||
}
|
||||
masterKey = Buffer.from(raw);
|
||||
console.log(`🔑 Master key loaded from ${keyPath}`);
|
||||
} else {
|
||||
const dir = dirname(keyPath);
|
||||
mkdirSync(dir, { recursive: true });
|
||||
|
||||
const key = Buffer.from(crypto.getRandomValues(new Uint8Array(KEY_LENGTH)));
|
||||
writeFileSync(keyPath, key, { mode: 0o600 });
|
||||
|
||||
try {
|
||||
chmodSync(keyPath, 0o600);
|
||||
} catch {
|
||||
// chmod may fail on some filesystems (e.g. Windows); non-fatal
|
||||
}
|
||||
|
||||
masterKey = key;
|
||||
console.log(`🔑 Generated new master key at ${keyPath}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current master key. Throws if not initialized.
|
||||
*/
|
||||
function getKey(): Buffer {
|
||||
if (!masterKey) {
|
||||
throw new Error('Master key not initialized. Call initMasterKey() at startup.');
|
||||
}
|
||||
return masterKey;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Encryption / Decryption
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface EncryptedPayload {
|
||||
/** AES-256-GCM ciphertext */
|
||||
ciphertext: Buffer;
|
||||
/** 12-byte initialization vector / nonce */
|
||||
iv: Buffer;
|
||||
/** 16-byte GCM authentication tag */
|
||||
authTag: Buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encrypt a plaintext string (e.g. API key) with AES-256-GCM.
|
||||
*
|
||||
* @returns Encrypted payload containing ciphertext, IV, and auth tag
|
||||
*/
|
||||
export function encrypt(plaintext: string): EncryptedPayload {
|
||||
const key = getKey();
|
||||
const iv = Buffer.from(crypto.getRandomValues(new Uint8Array(IV_LENGTH)));
|
||||
|
||||
// Use Web Crypto–style via Node's built-in crypto
|
||||
const { createCipheriv } = require('node:crypto') as typeof import('node:crypto');
|
||||
const cipher = createCipheriv('aes-256-gcm', key, iv);
|
||||
|
||||
const encrypted = Buffer.concat([cipher.update(plaintext, 'utf8'), cipher.final()]);
|
||||
const authTag = cipher.getAuthTag();
|
||||
|
||||
return {
|
||||
ciphertext: encrypted,
|
||||
iv,
|
||||
authTag,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Decrypt an AES-256-GCM encrypted payload back to plaintext.
|
||||
*
|
||||
* @throws If auth tag verification fails (tampered data or wrong key)
|
||||
*/
|
||||
export function decrypt(payload: EncryptedPayload): string {
|
||||
const key = getKey();
|
||||
|
||||
const { createDecipheriv } = require('node:crypto') as typeof import('node:crypto');
|
||||
const decipher = createDecipheriv('aes-256-gcm', key, payload.iv);
|
||||
decipher.setAuthTag(payload.authTag);
|
||||
|
||||
const decrypted = Buffer.concat([decipher.update(payload.ciphertext), decipher.final()]);
|
||||
return decrypted.toString('utf8');
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the master key has been initialized.
|
||||
*/
|
||||
export function isMasterKeyReady(): boolean {
|
||||
return masterKey !== null;
|
||||
}
|
||||
63
src/llm/capabilities.ts
Normal file
63
src/llm/capabilities.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
/**
|
||||
* Provider capability declarations.
|
||||
*
|
||||
* Each provider type declares what features it supports natively.
|
||||
* The Gateway and adapters use this to make runtime decisions
|
||||
* (e.g. prompt-inject JSON mode for Anthropic, skip embedding calls for non-supporting providers).
|
||||
*/
|
||||
|
||||
import type { ProviderType } from './types';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface ProviderCapabilities {
|
||||
/** Supports tool/function calling */
|
||||
supportsTools: boolean;
|
||||
/** Supports native JSON mode (vs. prompt injection + manual parsing) */
|
||||
supportsJsonMode: boolean;
|
||||
/** Supports SSE streaming */
|
||||
supportsStreaming: boolean;
|
||||
/** Supports embedding API */
|
||||
supportsEmbeddings: boolean;
|
||||
/** Supports image/multimodal input */
|
||||
supportsMultimodal: boolean;
|
||||
/** Max input token count (for pre-validation, avoids wasted API calls) */
|
||||
maxInputTokens?: number;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Default capabilities per provider type
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const DEFAULT_CAPABILITIES: Record<ProviderType, ProviderCapabilities> = {
|
||||
openai_compatible: {
|
||||
supportsTools: true,
|
||||
supportsJsonMode: true,
|
||||
supportsStreaming: true,
|
||||
supportsEmbeddings: true,
|
||||
supportsMultimodal: false, // depends on actual model behind the compatible endpoint
|
||||
},
|
||||
openai_responses: {
|
||||
supportsTools: true,
|
||||
supportsJsonMode: true,
|
||||
supportsStreaming: true,
|
||||
supportsEmbeddings: true,
|
||||
supportsMultimodal: true,
|
||||
},
|
||||
anthropic: {
|
||||
supportsTools: true,
|
||||
supportsJsonMode: false, // no native JSON mode; adapter uses prompt injection
|
||||
supportsStreaming: true,
|
||||
supportsEmbeddings: false,
|
||||
supportsMultimodal: true,
|
||||
},
|
||||
gemini: {
|
||||
supportsTools: true,
|
||||
supportsJsonMode: true, // responseMimeType: 'application/json'
|
||||
supportsStreaming: true,
|
||||
supportsEmbeddings: true,
|
||||
supportsMultimodal: true,
|
||||
},
|
||||
};
|
||||
92
src/llm/errors.ts
Normal file
92
src/llm/errors.ts
Normal file
@@ -0,0 +1,92 @@
|
||||
/**
|
||||
* Standardized error types for the LLM layer.
|
||||
*
|
||||
* All provider adapters catch native errors and re-throw as one of these.
|
||||
* Business code only needs to handle LLMError subtypes.
|
||||
*/
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Base error
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class LLMError extends Error {
|
||||
/** Provider type that caused the error */
|
||||
readonly provider: string;
|
||||
/** HTTP status code from the provider, if available */
|
||||
readonly statusCode?: number;
|
||||
|
||||
constructor(message: string, provider: string, statusCode?: number) {
|
||||
super(message);
|
||||
this.name = 'LLMError';
|
||||
this.provider = provider;
|
||||
this.statusCode = statusCode;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Specific error types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Authentication failed (invalid/expired API key). */
|
||||
export class LLMAuthError extends LLMError {
|
||||
constructor(provider: string, message?: string) {
|
||||
super(message || 'Authentication failed: invalid or expired API key', provider, 401);
|
||||
this.name = 'LLMAuthError';
|
||||
}
|
||||
}
|
||||
|
||||
/** Rate limit exceeded. */
|
||||
export class LLMRateLimitError extends LLMError {
|
||||
/** Seconds until the rate limit resets, if provided by the provider. */
|
||||
readonly retryAfterSeconds?: number;
|
||||
|
||||
constructor(provider: string, retryAfterSeconds?: number, message?: string) {
|
||||
super(
|
||||
message ||
|
||||
`Rate limit exceeded${retryAfterSeconds ? ` (retry after ${retryAfterSeconds}s)` : ''}`,
|
||||
provider,
|
||||
429
|
||||
);
|
||||
this.name = 'LLMRateLimitError';
|
||||
this.retryAfterSeconds = retryAfterSeconds;
|
||||
}
|
||||
}
|
||||
|
||||
/** Input too long / context window exceeded. */
|
||||
export class LLMContextLengthError extends LLMError {
|
||||
constructor(provider: string, message?: string) {
|
||||
super(message || 'Input exceeds model context length', provider, 400);
|
||||
this.name = 'LLMContextLengthError';
|
||||
}
|
||||
}
|
||||
|
||||
/** Provider returned an invalid or unparseable response. */
|
||||
export class LLMResponseError extends LLMError {
|
||||
/** The raw response body that couldn't be parsed */
|
||||
readonly rawResponse?: unknown;
|
||||
|
||||
constructor(provider: string, message?: string, rawResponse?: unknown) {
|
||||
super(message || 'Invalid response from provider', provider);
|
||||
this.name = 'LLMResponseError';
|
||||
this.rawResponse = rawResponse;
|
||||
}
|
||||
}
|
||||
|
||||
/** Provider is unreachable (network error, timeout, 5xx). */
|
||||
export class LLMConnectionError extends LLMError {
|
||||
constructor(provider: string, message?: string, statusCode?: number) {
|
||||
super(message || 'Failed to connect to provider', provider, statusCode);
|
||||
this.name = 'LLMConnectionError';
|
||||
}
|
||||
}
|
||||
|
||||
/** No provider is configured for the requested role. */
|
||||
export class LLMNoProviderError extends LLMError {
|
||||
readonly role: string;
|
||||
|
||||
constructor(role: string) {
|
||||
super(`No provider configured for role '${role}'`, 'gateway');
|
||||
this.name = 'LLMNoProviderError';
|
||||
this.role = role;
|
||||
}
|
||||
}
|
||||
140
src/llm/gateway.ts
Normal file
140
src/llm/gateway.ts
Normal file
@@ -0,0 +1,140 @@
|
||||
/**
|
||||
* LLM Gateway — the sole entry point for all business-layer LLM calls.
|
||||
*
|
||||
* Responsibilities:
|
||||
* 1. Look up model_role_assignments → provider_id + model for a given role
|
||||
* 2. Load (or cache) LLMProvider instances with decrypted API keys
|
||||
* 3. Route chat() calls to the correct adapter
|
||||
* 4. Invalidate cache when provider config changes via UI
|
||||
*/
|
||||
|
||||
import { type ModelRole, modelRoleRepo } from '../db/repositories/model-role-repo';
|
||||
import { providerRepo } from '../db/repositories/provider-repo';
|
||||
import { secretRepo } from '../db/repositories/secret-repo';
|
||||
import { LLMAuthError, LLMError, LLMNoProviderError } from './errors';
|
||||
import { createAnthropicProvider } from './providers/anthropic';
|
||||
import type { LLMProvider } from './providers/base';
|
||||
import { createGeminiProvider } from './providers/gemini';
|
||||
import { createOpenAICompatibleProvider } from './providers/openai-compatible';
|
||||
import { createOpenAIResponsesProvider } from './providers/openai-responses';
|
||||
import type { LLMChatRequest, LLMChatResponse, ProviderType } from './types';
|
||||
|
||||
type ProviderFactoryFn = (config: {
|
||||
baseUrl?: string;
|
||||
apiKey: string;
|
||||
defaultModel: string;
|
||||
extraConfig: Record<string, unknown>;
|
||||
}) => LLMProvider;
|
||||
|
||||
const PROVIDER_FACTORIES: Record<ProviderType, ProviderFactoryFn> = {
|
||||
openai_compatible: createOpenAICompatibleProvider,
|
||||
openai_responses: createOpenAIResponsesProvider,
|
||||
anthropic: createAnthropicProvider,
|
||||
gemini: createGeminiProvider,
|
||||
};
|
||||
|
||||
export class LLMGateway {
|
||||
private cache = new Map<string, LLMProvider>();
|
||||
|
||||
/**
|
||||
* Call LLM by business role. The role determines which provider + model to use.
|
||||
* The `model` field in the request is ignored — it's resolved from the DB assignment.
|
||||
*/
|
||||
async chatForRole(
|
||||
role: ModelRole,
|
||||
request: Omit<LLMChatRequest, 'model'>
|
||||
): Promise<LLMChatResponse> {
|
||||
const assignment = modelRoleRepo.getByRole(role);
|
||||
if (!assignment) throw new LLMNoProviderError(role);
|
||||
|
||||
const provider = this.getOrCreateProvider(assignment.provider_id);
|
||||
return provider.chat({ ...request, model: assignment.model });
|
||||
}
|
||||
|
||||
/**
|
||||
* Direct call to a specific provider (used for connection testing).
|
||||
*/
|
||||
async chatDirect(providerId: string, request: LLMChatRequest): Promise<LLMChatResponse> {
|
||||
const provider = this.getOrCreateProvider(providerId);
|
||||
return provider.chat(request);
|
||||
}
|
||||
|
||||
/**
|
||||
* Embedding via the provider assigned to the 'embedding' role.
|
||||
*/
|
||||
async embedForRole(texts: string[]): Promise<number[][]> {
|
||||
const assignment = modelRoleRepo.getByRole('embedding');
|
||||
if (!assignment) throw new LLMNoProviderError('embedding');
|
||||
|
||||
const provider = this.getOrCreateProvider(assignment.provider_id);
|
||||
if (!provider.embed) {
|
||||
throw new LLMError(`Provider '${provider.type}' does not support embeddings`, provider.type);
|
||||
}
|
||||
return provider.embed(texts);
|
||||
}
|
||||
|
||||
/**
|
||||
* Invalidate cached provider instance (call when config/key changes via UI).
|
||||
*/
|
||||
invalidateProvider(providerId: string): void {
|
||||
this.cache.delete(providerId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all cached providers (call on global config change).
|
||||
*/
|
||||
invalidateAll(): void {
|
||||
this.cache.clear();
|
||||
}
|
||||
|
||||
/** Get or create a provider instance (for direct access like listing models). */
|
||||
getProviderInstance(providerId: string): LLMProvider {
|
||||
return this.getOrCreateProvider(providerId);
|
||||
}
|
||||
|
||||
private getOrCreateProvider(providerId: string): LLMProvider {
|
||||
const cached = this.cache.get(providerId);
|
||||
if (cached) return cached;
|
||||
|
||||
const providerRow = providerRepo.getById(providerId);
|
||||
if (!providerRow) {
|
||||
throw new LLMError(`Provider '${providerId}' not found in database`, 'gateway');
|
||||
}
|
||||
if (!providerRow.is_enabled) {
|
||||
throw new LLMError(`Provider '${providerRow.name}' is disabled`, 'gateway');
|
||||
}
|
||||
|
||||
const apiKey = secretRepo.get(providerId);
|
||||
if (!apiKey) {
|
||||
throw new LLMAuthError(
|
||||
providerRow.type,
|
||||
`No API key configured for provider '${providerRow.name}'`
|
||||
);
|
||||
}
|
||||
|
||||
const factory = PROVIDER_FACTORIES[providerRow.type as ProviderType];
|
||||
if (!factory) {
|
||||
throw new LLMError(`Unknown provider type '${providerRow.type}'`, providerRow.type);
|
||||
}
|
||||
|
||||
let extraConfig: Record<string, unknown> = {};
|
||||
try {
|
||||
extraConfig = JSON.parse(providerRow.extra_config || '{}');
|
||||
} catch {
|
||||
// Invalid JSON in extra_config — use empty
|
||||
}
|
||||
|
||||
const provider = factory({
|
||||
baseUrl: providerRow.base_url || undefined,
|
||||
apiKey,
|
||||
defaultModel: providerRow.default_model,
|
||||
extraConfig,
|
||||
});
|
||||
|
||||
this.cache.set(providerId, provider);
|
||||
return provider;
|
||||
}
|
||||
}
|
||||
|
||||
// Singleton instance
|
||||
export const llmGateway = new LLMGateway();
|
||||
252
src/llm/providers/anthropic.ts
Normal file
252
src/llm/providers/anthropic.ts
Normal file
@@ -0,0 +1,252 @@
|
||||
/**
|
||||
* Anthropic Messages API adapter.
|
||||
*
|
||||
* Key differences from OpenAI:
|
||||
* - System message → `system` top-level param (not in messages array)
|
||||
* - No native JSON mode → prompt injection + JSON.parse with regex fallback
|
||||
* - Tool results → role='user' with tool_result content blocks
|
||||
* - Finish reasons: end_turn→stop, tool_use→tool_calls, max_tokens→length
|
||||
*/
|
||||
|
||||
import Anthropic from '@anthropic-ai/sdk';
|
||||
import { DEFAULT_CAPABILITIES, type ProviderCapabilities } from '../capabilities';
|
||||
import {
|
||||
LLMAuthError,
|
||||
LLMConnectionError,
|
||||
LLMContextLengthError,
|
||||
LLMRateLimitError,
|
||||
LLMResponseError,
|
||||
} from '../errors';
|
||||
import { toAnthropicTools } from '../tool-converter';
|
||||
import type {
|
||||
LLMChatRequest,
|
||||
LLMChatResponse,
|
||||
LLMFinishReason,
|
||||
LLMMessage,
|
||||
LLMToolCall,
|
||||
} from '../types';
|
||||
import type { LLMProvider, ProviderFactory } from './base';
|
||||
|
||||
const TYPE = 'anthropic';
|
||||
|
||||
const JSON_MODE_SUFFIX =
|
||||
'\n\nYou MUST respond with valid JSON only. No markdown, no explanation, no other text outside the JSON object.';
|
||||
|
||||
function mapStopReason(reason: string | null | undefined): LLMFinishReason {
|
||||
switch (reason) {
|
||||
case 'end_turn':
|
||||
return 'stop';
|
||||
case 'tool_use':
|
||||
return 'tool_calls';
|
||||
case 'max_tokens':
|
||||
return 'length';
|
||||
default:
|
||||
return 'stop';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Separate system messages from conversation messages.
|
||||
* Anthropic requires system as a top-level param, not inside the messages array.
|
||||
*/
|
||||
function convertMessages(
|
||||
messages: LLMMessage[],
|
||||
jsonMode: boolean
|
||||
): { system: string | undefined; anthropicMessages: Anthropic.MessageParam[] } {
|
||||
let system: string | undefined;
|
||||
const anthropicMessages: Anthropic.MessageParam[] = [];
|
||||
|
||||
for (const msg of messages) {
|
||||
if (msg.role === 'system') {
|
||||
const text = jsonMode ? msg.content + JSON_MODE_SUFFIX : msg.content;
|
||||
system = system ? `${system}\n${text}` : text;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (msg.role === 'tool') {
|
||||
// Tool results go as user messages with tool_result content block
|
||||
anthropicMessages.push({
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'tool_result',
|
||||
tool_use_id: msg.toolCallId!,
|
||||
content: msg.content,
|
||||
},
|
||||
],
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if (msg.role === 'assistant' && msg.toolCalls?.length) {
|
||||
const content: Anthropic.ContentBlockParam[] = [];
|
||||
if (msg.content) {
|
||||
content.push({ type: 'text', text: msg.content });
|
||||
}
|
||||
for (const tc of msg.toolCalls) {
|
||||
content.push({
|
||||
type: 'tool_use',
|
||||
id: tc.id,
|
||||
name: tc.name,
|
||||
input: JSON.parse(tc.arguments),
|
||||
});
|
||||
}
|
||||
anthropicMessages.push({ role: 'assistant', content });
|
||||
continue;
|
||||
}
|
||||
|
||||
anthropicMessages.push({
|
||||
role: msg.role as 'user' | 'assistant',
|
||||
content: msg.content,
|
||||
});
|
||||
}
|
||||
|
||||
// If JSON mode requested but no system message existed, create one
|
||||
if (jsonMode && !system) {
|
||||
system = JSON_MODE_SUFFIX.trim();
|
||||
}
|
||||
|
||||
return { system, anthropicMessages };
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to extract JSON from content that might have markdown wrapping.
|
||||
* Falls back to returning the raw content.
|
||||
*/
|
||||
function extractJson(content: string): string {
|
||||
// Try direct parse first
|
||||
try {
|
||||
JSON.parse(content);
|
||||
return content;
|
||||
} catch {
|
||||
// Noop — try regex extraction
|
||||
}
|
||||
|
||||
// Try extracting from ```json ... ``` blocks
|
||||
const jsonBlockMatch = content.match(/```(?:json)?\s*\n?([\s\S]*?)\n?\s*```/);
|
||||
if (jsonBlockMatch?.[1]) {
|
||||
try {
|
||||
JSON.parse(jsonBlockMatch[1].trim());
|
||||
return jsonBlockMatch[1].trim();
|
||||
} catch {
|
||||
// Noop
|
||||
}
|
||||
}
|
||||
|
||||
// Try finding first { ... } or [ ... ]
|
||||
const objectMatch = content.match(/\{[\s\S]*\}/);
|
||||
if (objectMatch?.[0]) {
|
||||
try {
|
||||
JSON.parse(objectMatch[0]);
|
||||
return objectMatch[0];
|
||||
} catch {
|
||||
// Noop
|
||||
}
|
||||
}
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
class AnthropicProvider implements LLMProvider {
|
||||
readonly type = TYPE;
|
||||
readonly capabilities: ProviderCapabilities;
|
||||
private client: Anthropic;
|
||||
private defaultModel: string;
|
||||
|
||||
constructor(config: {
|
||||
baseUrl?: string;
|
||||
apiKey: string;
|
||||
defaultModel: string;
|
||||
extraConfig: Record<string, unknown>;
|
||||
}) {
|
||||
this.client = new Anthropic({
|
||||
apiKey: config.apiKey,
|
||||
...(config.baseUrl ? { baseURL: config.baseUrl } : {}),
|
||||
});
|
||||
this.defaultModel = config.defaultModel;
|
||||
this.capabilities = { ...DEFAULT_CAPABILITIES.anthropic };
|
||||
}
|
||||
|
||||
async chat(request: LLMChatRequest): Promise<LLMChatResponse> {
|
||||
const model = request.model || this.defaultModel;
|
||||
const jsonMode = request.responseFormat === 'json';
|
||||
const { system, anthropicMessages } = convertMessages(request.messages, jsonMode);
|
||||
|
||||
const params: Anthropic.MessageCreateParamsNonStreaming = {
|
||||
model,
|
||||
messages: anthropicMessages,
|
||||
max_tokens: request.maxTokens ?? 4096,
|
||||
...(system ? { system } : {}),
|
||||
...(request.temperature !== undefined ? { temperature: request.temperature } : {}),
|
||||
...(request.tools?.length ? { tools: toAnthropicTools(request.tools) as any } : {}),
|
||||
};
|
||||
|
||||
try {
|
||||
const response = await this.client.messages.create(params);
|
||||
|
||||
let content: string | null = null;
|
||||
const toolCalls: LLMToolCall[] = [];
|
||||
|
||||
for (const block of response.content) {
|
||||
if (block.type === 'text') {
|
||||
content = (content || '') + block.text;
|
||||
} else if (block.type === 'tool_use') {
|
||||
toolCalls.push({
|
||||
id: block.id,
|
||||
name: block.name,
|
||||
arguments: JSON.stringify(block.input),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// JSON mode fallback: extract JSON from content
|
||||
if (jsonMode && content) {
|
||||
content = extractJson(content);
|
||||
}
|
||||
|
||||
return {
|
||||
content,
|
||||
toolCalls,
|
||||
finishReason: mapStopReason(response.stop_reason),
|
||||
usage: {
|
||||
promptTokens: response.usage.input_tokens,
|
||||
completionTokens: response.usage.output_tokens,
|
||||
totalTokens: response.usage.input_tokens + response.usage.output_tokens,
|
||||
},
|
||||
raw: response,
|
||||
};
|
||||
} catch (error) {
|
||||
throw this.wrapError(error);
|
||||
}
|
||||
}
|
||||
|
||||
async listModels(): Promise<string[]> {
|
||||
const response = await this.client.models.list();
|
||||
const models: string[] = [];
|
||||
for await (const page of response.iterPages()) {
|
||||
models.push(...page.data.map((model) => model.id));
|
||||
}
|
||||
return models;
|
||||
}
|
||||
|
||||
private wrapError(error: unknown): Error {
|
||||
if (error instanceof Anthropic.APIError) {
|
||||
if (error.status === 401) return new LLMAuthError(TYPE, error.message);
|
||||
if (error.status === 429) return new LLMRateLimitError(TYPE);
|
||||
if (error.status === 400 && error.message?.includes('too long'))
|
||||
return new LLMContextLengthError(TYPE, error.message);
|
||||
if (error.status && error.status >= 500)
|
||||
return new LLMConnectionError(TYPE, error.message, error.status);
|
||||
return new LLMResponseError(TYPE, error.message);
|
||||
}
|
||||
if (
|
||||
error instanceof Error &&
|
||||
(error.message.includes('ECONNREFUSED') || error.message.includes('fetch'))
|
||||
) {
|
||||
return new LLMConnectionError(TYPE, error.message);
|
||||
}
|
||||
return error instanceof Error ? error : new Error(String(error));
|
||||
}
|
||||
}
|
||||
|
||||
export const createAnthropicProvider: ProviderFactory = (config) => new AnthropicProvider(config);
|
||||
50
src/llm/providers/base.ts
Normal file
50
src/llm/providers/base.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
/**
|
||||
* LLMProvider abstract interface.
|
||||
*
|
||||
* Each adapter (openai-compatible, openai-responses, anthropic, gemini)
|
||||
* implements this interface. The Gateway only interacts with providers
|
||||
* through this contract.
|
||||
*/
|
||||
|
||||
import type { ProviderCapabilities } from '../capabilities';
|
||||
import type { LLMChatRequest, LLMChatResponse } from '../types';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Provider Interface
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface LLMProvider {
|
||||
/** Provider type identifier (e.g. 'openai_compatible', 'anthropic'). */
|
||||
readonly type: string;
|
||||
|
||||
/** Capability declaration for this provider. */
|
||||
readonly capabilities: ProviderCapabilities;
|
||||
|
||||
/**
|
||||
* Core chat method. The Gateway only calls this method.
|
||||
*
|
||||
* Each adapter is responsible for:
|
||||
* 1. Converting LLMChatRequest to provider-native format
|
||||
* 2. Making the HTTP/SDK call
|
||||
* 3. Converting the native response to LLMChatResponse
|
||||
*/
|
||||
chat(request: LLMChatRequest): Promise<LLMChatResponse>;
|
||||
|
||||
/** Optional: embedding interface (only for providers that support it). */
|
||||
embed?(texts: string[]): Promise<number[][]>;
|
||||
|
||||
/** Optional: list available models for this provider. */
|
||||
listModels?(): Promise<string[]>;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Provider Factory
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Factory function signature: create an LLMProvider from DB config + decrypted API key. */
|
||||
export type ProviderFactory = (config: {
|
||||
baseUrl?: string;
|
||||
apiKey: string;
|
||||
defaultModel: string;
|
||||
extraConfig: Record<string, unknown>;
|
||||
}) => LLMProvider;
|
||||
234
src/llm/providers/gemini.ts
Normal file
234
src/llm/providers/gemini.ts
Normal file
@@ -0,0 +1,234 @@
|
||||
/**
|
||||
* Google Gemini generateContent API adapter.
|
||||
*
|
||||
* Key differences from OpenAI:
|
||||
* - System message → systemInstruction param
|
||||
* - Role 'assistant' → 'model'
|
||||
* - Tool calls → functionCall parts; tool results → functionResponse parts
|
||||
* - Finish reasons: STOP, FUNCTION_CALL (unofficial—actually STOP), MAX_TOKENS, SAFETY
|
||||
* - Token usage: usageMetadata.{promptTokenCount, candidatesTokenCount}
|
||||
*/
|
||||
|
||||
import { type Content, GoogleGenAI, type Part } from '@google/genai';
|
||||
import { DEFAULT_CAPABILITIES, type ProviderCapabilities } from '../capabilities';
|
||||
import {
|
||||
LLMAuthError,
|
||||
LLMConnectionError,
|
||||
LLMContextLengthError,
|
||||
LLMRateLimitError,
|
||||
LLMResponseError,
|
||||
} from '../errors';
|
||||
import { toGeminiTools } from '../tool-converter';
|
||||
import type {
|
||||
LLMChatRequest,
|
||||
LLMChatResponse,
|
||||
LLMFinishReason,
|
||||
LLMMessage,
|
||||
LLMToolCall,
|
||||
} from '../types';
|
||||
import type { LLMProvider, ProviderFactory } from './base';
|
||||
|
||||
const TYPE = 'gemini';
|
||||
|
||||
function mapFinishReason(reason: string | undefined): LLMFinishReason {
|
||||
switch (reason) {
|
||||
case 'STOP':
|
||||
return 'stop';
|
||||
case 'MAX_TOKENS':
|
||||
return 'length';
|
||||
case 'SAFETY':
|
||||
return 'content_filter';
|
||||
case 'RECITATION':
|
||||
return 'content_filter';
|
||||
default:
|
||||
return 'stop';
|
||||
}
|
||||
}
|
||||
|
||||
function convertMessages(messages: LLMMessage[]): {
|
||||
systemInstruction: string | undefined;
|
||||
contents: Content[];
|
||||
} {
|
||||
let systemInstruction: string | undefined;
|
||||
const contents: Content[] = [];
|
||||
|
||||
for (const msg of messages) {
|
||||
if (msg.role === 'system') {
|
||||
systemInstruction = systemInstruction ? `${systemInstruction}\n${msg.content}` : msg.content;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (msg.role === 'tool') {
|
||||
// Tool results → function role with functionResponse part
|
||||
contents.push({
|
||||
role: 'function',
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
name: msg.toolCallId || 'unknown',
|
||||
response: safeParseJson(msg.content),
|
||||
},
|
||||
} as Part,
|
||||
],
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if (msg.role === 'assistant' && msg.toolCalls?.length) {
|
||||
const parts: Part[] = [];
|
||||
if (msg.content) {
|
||||
parts.push({ text: msg.content });
|
||||
}
|
||||
for (const tc of msg.toolCalls) {
|
||||
parts.push({
|
||||
functionCall: {
|
||||
name: tc.name,
|
||||
args: safeParseJson(tc.arguments),
|
||||
},
|
||||
} as Part);
|
||||
}
|
||||
contents.push({ role: 'model', parts });
|
||||
continue;
|
||||
}
|
||||
|
||||
// Gemini uses 'model' instead of 'assistant'
|
||||
const role = msg.role === 'assistant' ? 'model' : 'user';
|
||||
contents.push({
|
||||
role,
|
||||
parts: [{ text: msg.content }],
|
||||
});
|
||||
}
|
||||
|
||||
return { systemInstruction, contents };
|
||||
}
|
||||
|
||||
function safeParseJson(str: string): any {
|
||||
try {
|
||||
return JSON.parse(str);
|
||||
} catch {
|
||||
return { result: str };
|
||||
}
|
||||
}
|
||||
|
||||
class GeminiProvider implements LLMProvider {
|
||||
readonly type = TYPE;
|
||||
readonly capabilities: ProviderCapabilities;
|
||||
private genAI: GoogleGenAI;
|
||||
private defaultModel: string;
|
||||
|
||||
constructor(config: {
|
||||
baseUrl?: string;
|
||||
apiKey: string;
|
||||
defaultModel: string;
|
||||
extraConfig: Record<string, unknown>;
|
||||
}) {
|
||||
this.genAI = new GoogleGenAI({ apiKey: config.apiKey });
|
||||
this.defaultModel = config.defaultModel;
|
||||
this.capabilities = { ...DEFAULT_CAPABILITIES.gemini };
|
||||
}
|
||||
|
||||
async chat(request: LLMChatRequest): Promise<LLMChatResponse> {
|
||||
const modelId = request.model || this.defaultModel;
|
||||
const { systemInstruction, contents } = convertMessages(request.messages);
|
||||
|
||||
try {
|
||||
const response = await this.genAI.models.generateContent({
|
||||
model: modelId,
|
||||
contents,
|
||||
config: {
|
||||
...(systemInstruction ? { systemInstruction } : {}),
|
||||
...(request.temperature !== undefined ? { temperature: request.temperature } : {}),
|
||||
...(request.maxTokens !== undefined ? { maxOutputTokens: request.maxTokens } : {}),
|
||||
...(request.responseFormat === 'json' ? { responseMimeType: 'application/json' } : {}),
|
||||
...(request.tools?.length ? { tools: toGeminiTools(request.tools) as any } : {}),
|
||||
},
|
||||
});
|
||||
const candidate = response.candidates?.[0];
|
||||
|
||||
let content: string | null = null;
|
||||
const toolCalls: LLMToolCall[] = [];
|
||||
|
||||
if (candidate?.content?.parts) {
|
||||
for (const part of candidate.content.parts) {
|
||||
if ('text' in part && part.text) {
|
||||
content = (content || '') + part.text;
|
||||
}
|
||||
if ('functionCall' in part && part.functionCall) {
|
||||
const toolName = String(part.functionCall.name ?? 'unknown');
|
||||
toolCalls.push({
|
||||
id: `call_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
|
||||
name: toolName,
|
||||
arguments: JSON.stringify(part.functionCall.args || {}),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const hasFunctionCalls = toolCalls.length > 0;
|
||||
const finishReason = hasFunctionCalls
|
||||
? 'tool_calls'
|
||||
: mapFinishReason(candidate?.finishReason as string | undefined);
|
||||
|
||||
const usage = response.usageMetadata;
|
||||
|
||||
return {
|
||||
content,
|
||||
toolCalls,
|
||||
finishReason,
|
||||
usage: {
|
||||
promptTokens: usage?.promptTokenCount ?? 0,
|
||||
completionTokens: usage?.candidatesTokenCount ?? 0,
|
||||
totalTokens: usage?.totalTokenCount ?? 0,
|
||||
},
|
||||
raw: response,
|
||||
};
|
||||
} catch (error) {
|
||||
throw this.wrapError(error);
|
||||
}
|
||||
}
|
||||
|
||||
async embed(texts: string[]): Promise<number[][]> {
|
||||
try {
|
||||
const results: number[][] = [];
|
||||
for (const text of texts) {
|
||||
const result = await this.genAI.models.embedContent({
|
||||
model: this.defaultModel,
|
||||
contents: text,
|
||||
});
|
||||
results.push(result.embeddings?.[0]?.values ?? []);
|
||||
}
|
||||
return results;
|
||||
} catch (error) {
|
||||
throw this.wrapError(error);
|
||||
}
|
||||
}
|
||||
|
||||
async listModels(): Promise<string[]> {
|
||||
const pager = await this.genAI.models.list();
|
||||
const models: string[] = [];
|
||||
for await (const model of pager) {
|
||||
if (model.name) {
|
||||
models.push(model.name.replace('models/', ''));
|
||||
}
|
||||
}
|
||||
return models;
|
||||
}
|
||||
|
||||
private wrapError(error: unknown): Error {
|
||||
if (error instanceof Error) {
|
||||
const msg = error.message;
|
||||
if (msg.includes('API_KEY_INVALID') || msg.includes('401'))
|
||||
return new LLMAuthError(TYPE, msg);
|
||||
if (msg.includes('429') || msg.includes('RESOURCE_EXHAUSTED'))
|
||||
return new LLMRateLimitError(TYPE);
|
||||
if (msg.includes('context') || msg.includes('too long'))
|
||||
return new LLMContextLengthError(TYPE, msg);
|
||||
if (msg.includes('ECONNREFUSED') || msg.includes('fetch') || msg.includes('500'))
|
||||
return new LLMConnectionError(TYPE, msg);
|
||||
return new LLMResponseError(TYPE, msg);
|
||||
}
|
||||
return new Error(String(error));
|
||||
}
|
||||
}
|
||||
|
||||
export const createGeminiProvider: ProviderFactory = (config) => new GeminiProvider(config);
|
||||
172
src/llm/providers/openai-compatible.ts
Normal file
172
src/llm/providers/openai-compatible.ts
Normal file
@@ -0,0 +1,172 @@
|
||||
/**
|
||||
* OpenAI Compatible adapter — for any service implementing the OpenAI chat.completions API.
|
||||
* This is the existing format used by the codebase today.
|
||||
*/
|
||||
|
||||
import OpenAI from 'openai';
|
||||
import { DEFAULT_CAPABILITIES, type ProviderCapabilities } from '../capabilities';
|
||||
import {
|
||||
LLMAuthError,
|
||||
LLMConnectionError,
|
||||
LLMContextLengthError,
|
||||
LLMRateLimitError,
|
||||
LLMResponseError,
|
||||
} from '../errors';
|
||||
import { toOpenAITools } from '../tool-converter';
|
||||
import type {
|
||||
LLMChatRequest,
|
||||
LLMChatResponse,
|
||||
LLMFinishReason,
|
||||
LLMMessage,
|
||||
LLMToolCall,
|
||||
} from '../types';
|
||||
import type { LLMProvider, ProviderFactory } from './base';
|
||||
|
||||
const TYPE = 'openai_compatible';
|
||||
|
||||
function mapFinishReason(reason: string | null | undefined): LLMFinishReason {
|
||||
switch (reason) {
|
||||
case 'stop':
|
||||
return 'stop';
|
||||
case 'tool_calls':
|
||||
return 'tool_calls';
|
||||
case 'length':
|
||||
return 'length';
|
||||
case 'content_filter':
|
||||
return 'content_filter';
|
||||
default:
|
||||
return 'stop';
|
||||
}
|
||||
}
|
||||
|
||||
function toOpenAIMessages(messages: LLMMessage[]): OpenAI.ChatCompletionMessageParam[] {
|
||||
return messages.map((msg) => {
|
||||
if (msg.role === 'tool') {
|
||||
return {
|
||||
role: 'tool' as const,
|
||||
content: msg.content,
|
||||
tool_call_id: msg.toolCallId!,
|
||||
};
|
||||
}
|
||||
if (msg.role === 'assistant' && msg.toolCalls?.length) {
|
||||
return {
|
||||
role: 'assistant' as const,
|
||||
content: msg.content || null,
|
||||
tool_calls: msg.toolCalls.map((tc) => ({
|
||||
id: tc.id,
|
||||
type: 'function' as const,
|
||||
function: { name: tc.name, arguments: tc.arguments },
|
||||
})),
|
||||
};
|
||||
}
|
||||
return {
|
||||
role: msg.role as 'system' | 'user' | 'assistant',
|
||||
content: msg.content,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function extractToolCalls(choice: OpenAI.ChatCompletion.Choice): LLMToolCall[] {
|
||||
const toolCalls = choice.message.tool_calls;
|
||||
if (!toolCalls?.length) return [];
|
||||
return toolCalls.map((tc) => ({
|
||||
id: tc.id,
|
||||
name: tc.function.name,
|
||||
arguments: tc.function.arguments,
|
||||
}));
|
||||
}
|
||||
|
||||
class OpenAICompatibleProvider implements LLMProvider {
|
||||
readonly type = TYPE;
|
||||
readonly capabilities: ProviderCapabilities;
|
||||
private client: OpenAI;
|
||||
private defaultModel: string;
|
||||
|
||||
constructor(config: {
|
||||
baseUrl?: string;
|
||||
apiKey: string;
|
||||
defaultModel: string;
|
||||
extraConfig: Record<string, unknown>;
|
||||
}) {
|
||||
this.client = new OpenAI({
|
||||
baseURL: config.baseUrl || 'https://api.openai.com/v1',
|
||||
apiKey: config.apiKey,
|
||||
...(config.extraConfig.organization
|
||||
? { organization: config.extraConfig.organization as string }
|
||||
: {}),
|
||||
});
|
||||
this.defaultModel = config.defaultModel;
|
||||
this.capabilities = { ...DEFAULT_CAPABILITIES.openai_compatible };
|
||||
}
|
||||
|
||||
async chat(request: LLMChatRequest): Promise<LLMChatResponse> {
|
||||
const model = request.model || this.defaultModel;
|
||||
|
||||
const params: OpenAI.ChatCompletionCreateParamsNonStreaming = {
|
||||
model,
|
||||
messages: toOpenAIMessages(request.messages),
|
||||
...(request.temperature !== undefined ? { temperature: request.temperature } : {}),
|
||||
...(request.maxTokens !== undefined ? { max_tokens: request.maxTokens } : {}),
|
||||
...(request.responseFormat === 'json' ? { response_format: { type: 'json_object' } } : {}),
|
||||
...(request.tools?.length ? { tools: toOpenAITools(request.tools) as any } : {}),
|
||||
};
|
||||
|
||||
try {
|
||||
const response = await this.client.chat.completions.create(params);
|
||||
const choice = response.choices[0];
|
||||
|
||||
return {
|
||||
content: choice?.message?.content ?? null,
|
||||
toolCalls: choice ? extractToolCalls(choice) : [],
|
||||
finishReason: mapFinishReason(choice?.finish_reason),
|
||||
usage: {
|
||||
promptTokens: response.usage?.prompt_tokens ?? 0,
|
||||
completionTokens: response.usage?.completion_tokens ?? 0,
|
||||
totalTokens: response.usage?.total_tokens ?? 0,
|
||||
},
|
||||
raw: response,
|
||||
};
|
||||
} catch (error) {
|
||||
throw this.wrapError(error);
|
||||
}
|
||||
}
|
||||
|
||||
async embed(texts: string[]): Promise<number[][]> {
|
||||
try {
|
||||
const response = await this.client.embeddings.create({
|
||||
model: this.defaultModel,
|
||||
input: texts,
|
||||
});
|
||||
return response.data.map((d) => d.embedding);
|
||||
} catch (error) {
|
||||
throw this.wrapError(error);
|
||||
}
|
||||
}
|
||||
|
||||
async listModels(): Promise<string[]> {
|
||||
const response = await this.client.models.list();
|
||||
return response.data.map((model) => model.id);
|
||||
}
|
||||
|
||||
private wrapError(error: unknown): Error {
|
||||
if (error instanceof OpenAI.APIError) {
|
||||
if (error.status === 401) return new LLMAuthError(TYPE, error.message);
|
||||
if (error.status === 429) return new LLMRateLimitError(TYPE);
|
||||
if (error.status === 400 && error.message?.includes('context_length'))
|
||||
return new LLMContextLengthError(TYPE, error.message);
|
||||
if (error.status && error.status >= 500)
|
||||
return new LLMConnectionError(TYPE, error.message, error.status);
|
||||
return new LLMResponseError(TYPE, error.message);
|
||||
}
|
||||
if (
|
||||
error instanceof Error &&
|
||||
(error.message.includes('ECONNREFUSED') || error.message.includes('fetch'))
|
||||
) {
|
||||
return new LLMConnectionError(TYPE, error.message);
|
||||
}
|
||||
return error instanceof Error ? error : new Error(String(error));
|
||||
}
|
||||
}
|
||||
|
||||
export const createOpenAICompatibleProvider: ProviderFactory = (config) =>
|
||||
new OpenAICompatibleProvider(config);
|
||||
211
src/llm/providers/openai-responses.ts
Normal file
211
src/llm/providers/openai-responses.ts
Normal file
@@ -0,0 +1,211 @@
|
||||
/**
|
||||
* OpenAI Responses API adapter — uses the newer responses.create endpoint.
|
||||
* System instructions are passed as the `instructions` parameter instead of a system message.
|
||||
*/
|
||||
|
||||
import OpenAI from 'openai';
|
||||
import { DEFAULT_CAPABILITIES, type ProviderCapabilities } from '../capabilities';
|
||||
import {
|
||||
LLMAuthError,
|
||||
LLMConnectionError,
|
||||
LLMContextLengthError,
|
||||
LLMRateLimitError,
|
||||
LLMResponseError,
|
||||
} from '../errors';
|
||||
import { toOpenAITools } from '../tool-converter';
|
||||
import type {
|
||||
LLMChatRequest,
|
||||
LLMChatResponse,
|
||||
LLMFinishReason,
|
||||
LLMMessage,
|
||||
LLMToolCall,
|
||||
} from '../types';
|
||||
import type { LLMProvider, ProviderFactory } from './base';
|
||||
|
||||
const TYPE = 'openai_responses';
|
||||
|
||||
function mapStatus(status: string | undefined): LLMFinishReason {
|
||||
switch (status) {
|
||||
case 'completed':
|
||||
return 'stop';
|
||||
case 'incomplete':
|
||||
return 'length';
|
||||
default:
|
||||
return 'stop';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract system instructions and build input items from unified messages.
|
||||
* System messages → `instructions` parameter.
|
||||
* Remaining messages → `input` array.
|
||||
*/
|
||||
function convertMessages(messages: LLMMessage[]): {
|
||||
instructions: string | undefined;
|
||||
input: any[];
|
||||
} {
|
||||
let instructions: string | undefined;
|
||||
const input: any[] = [];
|
||||
|
||||
for (const msg of messages) {
|
||||
if (msg.role === 'system') {
|
||||
instructions = instructions ? `${instructions}\n${msg.content}` : msg.content;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (msg.role === 'tool') {
|
||||
input.push({
|
||||
type: 'function_call_output',
|
||||
call_id: msg.toolCallId,
|
||||
output: msg.content,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if (msg.role === 'assistant' && msg.toolCalls?.length) {
|
||||
// Emit function_call items for each tool call
|
||||
for (const tc of msg.toolCalls) {
|
||||
input.push({
|
||||
type: 'function_call',
|
||||
id: tc.id,
|
||||
name: tc.name,
|
||||
arguments: tc.arguments,
|
||||
});
|
||||
}
|
||||
// Also emit text if present
|
||||
if (msg.content) {
|
||||
input.push({ role: 'assistant', content: msg.content });
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
input.push({
|
||||
role: msg.role,
|
||||
content: msg.content,
|
||||
});
|
||||
}
|
||||
|
||||
return { instructions, input };
|
||||
}
|
||||
|
||||
class OpenAIResponsesProvider implements LLMProvider {
|
||||
readonly type = TYPE;
|
||||
readonly capabilities: ProviderCapabilities;
|
||||
private client: OpenAI;
|
||||
private defaultModel: string;
|
||||
|
||||
constructor(config: {
|
||||
baseUrl?: string;
|
||||
apiKey: string;
|
||||
defaultModel: string;
|
||||
extraConfig: Record<string, unknown>;
|
||||
}) {
|
||||
this.client = new OpenAI({
|
||||
baseURL: config.baseUrl || undefined,
|
||||
apiKey: config.apiKey,
|
||||
});
|
||||
this.defaultModel = config.defaultModel;
|
||||
this.capabilities = { ...DEFAULT_CAPABILITIES.openai_responses };
|
||||
}
|
||||
|
||||
async chat(request: LLMChatRequest): Promise<LLMChatResponse> {
|
||||
const model = request.model || this.defaultModel;
|
||||
const { instructions, input } = convertMessages(request.messages);
|
||||
|
||||
const params: any = {
|
||||
model,
|
||||
input,
|
||||
...(instructions ? { instructions } : {}),
|
||||
...(request.temperature !== undefined ? { temperature: request.temperature } : {}),
|
||||
...(request.maxTokens !== undefined ? { max_output_tokens: request.maxTokens } : {}),
|
||||
...(request.responseFormat === 'json' ? { text: { format: { type: 'json_object' } } } : {}),
|
||||
...(request.tools?.length
|
||||
? {
|
||||
tools: toOpenAITools(request.tools).map((t: any) => ({
|
||||
type: 'function',
|
||||
...t.function,
|
||||
})),
|
||||
}
|
||||
: {}),
|
||||
};
|
||||
|
||||
try {
|
||||
const response = await this.client.responses.create(params);
|
||||
|
||||
// Extract content and tool calls from output
|
||||
let content: string | null = null;
|
||||
const toolCalls: LLMToolCall[] = [];
|
||||
|
||||
for (const item of response.output || []) {
|
||||
if (item.type === 'message') {
|
||||
for (const block of (item as any).content || []) {
|
||||
if (block.type === 'output_text') {
|
||||
content = (content || '') + block.text;
|
||||
}
|
||||
}
|
||||
} else if (item.type === 'function_call') {
|
||||
toolCalls.push({
|
||||
id: (item as any).id || (item as any).call_id || '',
|
||||
name: (item as any).name,
|
||||
arguments: (item as any).arguments || '{}',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
content,
|
||||
toolCalls,
|
||||
finishReason: toolCalls.length > 0 ? 'tool_calls' : mapStatus(response.status),
|
||||
usage: {
|
||||
promptTokens: (response.usage as any)?.input_tokens ?? 0,
|
||||
completionTokens: (response.usage as any)?.output_tokens ?? 0,
|
||||
totalTokens:
|
||||
((response.usage as any)?.input_tokens ?? 0) +
|
||||
((response.usage as any)?.output_tokens ?? 0),
|
||||
},
|
||||
raw: response,
|
||||
};
|
||||
} catch (error) {
|
||||
throw this.wrapError(error);
|
||||
}
|
||||
}
|
||||
|
||||
async embed(texts: string[]): Promise<number[][]> {
|
||||
try {
|
||||
const response = await this.client.embeddings.create({
|
||||
model: this.defaultModel,
|
||||
input: texts,
|
||||
});
|
||||
return response.data.map((d) => d.embedding);
|
||||
} catch (error) {
|
||||
throw this.wrapError(error);
|
||||
}
|
||||
}
|
||||
|
||||
async listModels(): Promise<string[]> {
|
||||
const response = await this.client.models.list();
|
||||
return response.data.map((model) => model.id);
|
||||
}
|
||||
|
||||
private wrapError(error: unknown): Error {
|
||||
if (error instanceof OpenAI.APIError) {
|
||||
if (error.status === 401) return new LLMAuthError(TYPE, error.message);
|
||||
if (error.status === 429) return new LLMRateLimitError(TYPE);
|
||||
if (error.status === 400 && error.message?.includes('context_length'))
|
||||
return new LLMContextLengthError(TYPE, error.message);
|
||||
if (error.status && error.status >= 500)
|
||||
return new LLMConnectionError(TYPE, error.message, error.status);
|
||||
return new LLMResponseError(TYPE, error.message);
|
||||
}
|
||||
if (
|
||||
error instanceof Error &&
|
||||
(error.message.includes('ECONNREFUSED') || error.message.includes('fetch'))
|
||||
) {
|
||||
return new LLMConnectionError(TYPE, error.message);
|
||||
}
|
||||
return error instanceof Error ? error : new Error(String(error));
|
||||
}
|
||||
}
|
||||
|
||||
export const createOpenAIResponsesProvider: ProviderFactory = (config) =>
|
||||
new OpenAIResponsesProvider(config);
|
||||
37
src/llm/tool-converter.ts
Normal file
37
src/llm/tool-converter.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
/**
|
||||
* Converts internal LLMToolDefinition to provider-native tool formats.
|
||||
* Called by each adapter in their chat() method.
|
||||
*/
|
||||
|
||||
import type { LLMToolDefinition } from './types';
|
||||
|
||||
export function toOpenAITools(tools: LLMToolDefinition[]): object[] {
|
||||
return tools.map((tool) => ({
|
||||
type: 'function' as const,
|
||||
function: {
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
parameters: tool.parameters,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
export function toAnthropicTools(tools: LLMToolDefinition[]): object[] {
|
||||
return tools.map((tool) => ({
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
input_schema: tool.parameters,
|
||||
}));
|
||||
}
|
||||
|
||||
export function toGeminiTools(tools: LLMToolDefinition[]): object[] {
|
||||
return [
|
||||
{
|
||||
functionDeclarations: tools.map((tool) => ({
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
parameters: tool.parameters,
|
||||
})),
|
||||
},
|
||||
];
|
||||
}
|
||||
104
src/llm/types.ts
Normal file
104
src/llm/types.ts
Normal file
@@ -0,0 +1,104 @@
|
||||
/**
|
||||
* Unified LLM types — provider-agnostic request/response format.
|
||||
*
|
||||
* All business code uses these types exclusively.
|
||||
* Provider adapters translate to/from these types.
|
||||
*/
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Model Role
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Business role that maps to a specific provider + model via DB config. */
|
||||
export type ModelRole = 'legacy' | 'planner' | 'specialist' | 'judge' | 'embedding';
|
||||
|
||||
/** All valid model roles. */
|
||||
export const MODEL_ROLES: readonly ModelRole[] = [
|
||||
'legacy',
|
||||
'planner',
|
||||
'specialist',
|
||||
'judge',
|
||||
'embedding',
|
||||
] as const;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Provider Type
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export type ProviderType = 'openai_compatible' | 'openai_responses' | 'anthropic' | 'gemini';
|
||||
|
||||
export const PROVIDER_TYPES: readonly ProviderType[] = [
|
||||
'openai_compatible',
|
||||
'openai_responses',
|
||||
'anthropic',
|
||||
'gemini',
|
||||
] as const;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Messages
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Unified message format (internal representation, hides provider differences). */
|
||||
export interface LLMMessage {
|
||||
role: 'system' | 'user' | 'assistant' | 'tool';
|
||||
content: string;
|
||||
/** When role='tool', the ID of the tool call this result corresponds to. */
|
||||
toolCallId?: string;
|
||||
/** When role='assistant', any tool calls the model wants to make. */
|
||||
toolCalls?: LLMToolCall[];
|
||||
}
|
||||
|
||||
/** A single tool call from the model. */
|
||||
export interface LLMToolCall {
|
||||
id: string;
|
||||
name: string;
|
||||
/** JSON-serialized arguments */
|
||||
arguments: string;
|
||||
}
|
||||
|
||||
/** Tool definition in provider-agnostic format. Converted to native format by tool-converter. */
|
||||
export interface LLMToolDefinition {
|
||||
name: string;
|
||||
description: string;
|
||||
/** JSON Schema for the tool's parameters */
|
||||
parameters: Record<string, unknown>;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Request
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Unified chat request — provider adapters translate this to native format. */
|
||||
export interface LLMChatRequest {
|
||||
messages: LLMMessage[];
|
||||
model: string;
|
||||
temperature?: number;
|
||||
maxTokens?: number;
|
||||
/** Abstract JSON mode: adapters handle native support or prompt injection fallback. */
|
||||
responseFormat?: 'text' | 'json';
|
||||
tools?: LLMToolDefinition[];
|
||||
/** Provider-specific passthrough config (e.g. Anthropic thinking, Gemini safetySettings). */
|
||||
providerOptions?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Response
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Unified chat response — provider adapters normalize to this format. */
|
||||
export interface LLMChatResponse {
|
||||
content: string | null;
|
||||
toolCalls: LLMToolCall[];
|
||||
finishReason: LLMFinishReason;
|
||||
usage: LLMUsage;
|
||||
/** Raw provider response preserved for debugging. */
|
||||
raw?: unknown;
|
||||
}
|
||||
|
||||
export type LLMFinishReason = 'stop' | 'tool_calls' | 'length' | 'content_filter' | 'error';
|
||||
|
||||
export interface LLMUsage {
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
totalTokens: number;
|
||||
}
|
||||
Reference in New Issue
Block a user