feat: 实现语音转文字并支持流式输出;

fix: 修复了语音解密失败的问题
This commit is contained in:
cc
2026-01-17 14:16:54 +08:00
parent 650de55202
commit e8babd48b6
33 changed files with 1713 additions and 570 deletions

View File

@@ -1,19 +1,23 @@
import { app } from 'electron'
import { createWriteStream, existsSync, mkdirSync, statSync, unlinkSync, writeFileSync } from 'fs'
import { join, dirname } from 'path'
import { promisify } from 'util'
import { execFile, spawnSync } from 'child_process'
import { existsSync, mkdirSync, statSync, unlinkSync, createWriteStream } from 'fs'
import { join } from 'path'
import * as https from 'https'
import * as http from 'http'
import { ConfigService } from './config'
const execFileAsync = promisify(execFile)
// Sherpa-onnx 类型定义
type OfflineRecognizer = any
type OfflineStream = any
type WhisperModelInfo = {
type ModelInfo = {
name: string
fileName: string
files: {
model: string
tokens: string
vad: string
}
sizeBytes: number
sizeLabel: string
sizeBytes?: number
}
type DownloadProgress = {
@@ -23,122 +27,169 @@ type DownloadProgress = {
percent?: number
}
const WHISPER_MODELS: Record<string, WhisperModelInfo> = {
tiny: { name: 'tiny', fileName: 'ggml-tiny.bin', sizeLabel: '75 MB', sizeBytes: 75_000_000 },
base: { name: 'base', fileName: 'ggml-base.bin', sizeLabel: '142 MB', sizeBytes: 142_000_000 },
small: { name: 'small', fileName: 'ggml-small.bin', sizeLabel: '466 MB', sizeBytes: 466_000_000 },
medium: { name: 'medium', fileName: 'ggml-medium.bin', sizeLabel: '1.5 GB', sizeBytes: 1_500_000_000 },
'large-v3': { name: 'large-v3', fileName: 'ggml-large-v3.bin', sizeLabel: '2.9 GB', sizeBytes: 2_900_000_000 }
const SENSEVOICE_MODEL: ModelInfo = {
name: 'SenseVoiceSmall',
files: {
model: 'model.int8.onnx',
tokens: 'tokens.txt',
vad: 'silero_vad.onnx'
},
sizeBytes: 245_000_000,
sizeLabel: '245 MB'
}
const WHISPER_SOURCES: Record<string, string> = {
official: 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main',
tsinghua: 'https://hf-mirror.com/ggerganov/whisper.cpp/resolve/main'
}
function getStaticFfmpegPath(): string | null {
try {
// eslint-disable-next-line @typescript-eslint/no-var-requires
const ffmpegStatic = require('ffmpeg-static')
if (typeof ffmpegStatic === 'string' && existsSync(ffmpegStatic)) {
return ffmpegStatic
}
const devPath = join(process.cwd(), 'node_modules', 'ffmpeg-static', 'ffmpeg.exe')
if (existsSync(devPath)) {
return devPath
}
if (app.isPackaged) {
const resourcesPath = process.resourcesPath
const packedPath = join(resourcesPath, 'app.asar.unpacked', 'node_modules', 'ffmpeg-static', 'ffmpeg.exe')
if (existsSync(packedPath)) {
return packedPath
}
}
return null
} catch {
return null
}
const MODEL_DOWNLOAD_URLS = {
model: 'https://modelscope.cn/models/pengzhendong/sherpa-onnx-sense-voice-zh-en-ja-ko-yue/resolve/master/model.int8.onnx',
tokens: 'https://modelscope.cn/models/pengzhendong/sherpa-onnx-sense-voice-zh-en-ja-ko-yue/resolve/master/tokens.txt',
vad: 'https://www.modelscope.cn/models/manyeyes/silero-vad-onnx/resolve/master/silero_vad.onnx'
}
export class VoiceTranscribeService {
private configService = new ConfigService()
private downloadTasks = new Map<string, Promise<{ success: boolean; path?: string; error?: string }>>()
private recognizer: OfflineRecognizer | null = null
private isInitializing = false
private resolveModelInfo(modelName: string): WhisperModelInfo | null {
return WHISPER_MODELS[modelName] || null
}
private resolveModelDir(overrideDir?: string): string {
const configured = overrideDir || this.configService.get('whisperModelDir')
private resolveModelDir(): string {
const configured = this.configService.get('whisperModelDir') as string | undefined
if (configured) return configured
return join(app.getPath('userData'), 'models', 'whisper')
return join(app.getPath('documents'), 'WeFlow', 'models', 'sensevoice')
}
private resolveModelPath(modelName: string, overrideDir?: string): string | null {
const info = this.resolveModelInfo(modelName)
if (!info) return null
return join(this.resolveModelDir(overrideDir), info.fileName)
private resolveModelPath(fileName: string): string {
return join(this.resolveModelDir(), fileName)
}
private resolveSourceUrl(overrideSource?: string): string {
const configured = overrideSource || this.configService.get('whisperDownloadSource')
if (configured && WHISPER_SOURCES[configured]) return WHISPER_SOURCES[configured]
return WHISPER_SOURCES.official
}
async getModelStatus(payload: { modelName: string; downloadDir?: string }): Promise<{
/**
* 检查模型状态
*/
async getModelStatus(): Promise<{
success: boolean
exists?: boolean
path?: string
modelPath?: string
tokensPath?: string
sizeBytes?: number
error?: string
}> {
const modelPath = this.resolveModelPath(payload.modelName, payload.downloadDir)
if (!modelPath) {
return { success: false, error: '未知模型名称' }
try {
const modelPath = this.resolveModelPath(SENSEVOICE_MODEL.files.model)
const tokensPath = this.resolveModelPath(SENSEVOICE_MODEL.files.tokens)
const vadPath = this.resolveModelPath((SENSEVOICE_MODEL.files as any).vad)
const modelExists = existsSync(modelPath)
const tokensExists = existsSync(tokensPath)
const vadExists = existsSync(vadPath)
const exists = modelExists && tokensExists && vadExists
if (!exists) {
return { success: true, exists: false, modelPath, tokensPath }
}
const modelSize = statSync(modelPath).size
const tokensSize = statSync(tokensPath).size
const vadSize = statSync(vadPath).size
const totalSize = modelSize + tokensSize + vadSize
return {
success: true,
exists: true,
modelPath,
tokensPath,
sizeBytes: totalSize
}
} catch (error) {
console.error('[VoiceTranscribe] getModelStatus error:', error)
return { success: false, error: String(error) }
}
if (!existsSync(modelPath)) {
return { success: true, exists: false, path: modelPath }
}
const sizeBytes = statSync(modelPath).size
return { success: true, exists: true, path: modelPath, sizeBytes }
}
/**
* 下载模型文件
*/
async downloadModel(
payload: { modelName: string; downloadDir?: string; source?: string },
onProgress?: (progress: DownloadProgress) => void
): Promise<{ success: boolean; path?: string; error?: string }> {
const info = this.resolveModelInfo(payload.modelName)
if (!info) {
return { success: false, error: '未知模型名称' }
}
const modelPath = this.resolveModelPath(payload.modelName, payload.downloadDir)
if (!modelPath) {
return { success: false, error: '模型路径生成失败' }
}
if (existsSync(modelPath)) {
return { success: true, path: modelPath }
}
const cacheKey = `${payload.modelName}:${modelPath}`
): Promise<{ success: boolean; modelPath?: string; tokensPath?: string; error?: string }> {
const cacheKey = 'sensevoice'
const pending = this.downloadTasks.get(cacheKey)
if (pending) return pending
const task = (async () => {
try {
const targetDir = this.resolveModelDir(payload.downloadDir)
if (!existsSync(targetDir)) {
mkdirSync(targetDir, { recursive: true })
const modelDir = this.resolveModelDir()
if (!existsSync(modelDir)) {
mkdirSync(modelDir, { recursive: true })
}
const baseUrl = this.resolveSourceUrl(payload.source)
const url = `${baseUrl}/${info.fileName}`
await this.downloadToFile(url, modelPath, payload.modelName, onProgress)
return { success: true, path: modelPath }
const modelPath = this.resolveModelPath(SENSEVOICE_MODEL.files.model)
const tokensPath = this.resolveModelPath(SENSEVOICE_MODEL.files.tokens)
const vadPath = this.resolveModelPath((SENSEVOICE_MODEL.files as any).vad)
// 下载模型文件 (40%)
console.info('[VoiceTranscribe] 开始下载模型文件...')
await this.downloadToFile(
MODEL_DOWNLOAD_URLS.model,
modelPath,
'model',
(downloaded, total) => {
const percent = total ? (downloaded / total) * 40 : undefined
onProgress?.({
modelName: SENSEVOICE_MODEL.name,
downloadedBytes: downloaded,
totalBytes: SENSEVOICE_MODEL.sizeBytes,
percent
})
}
)
// 下载 tokens 文件 (30%)
console.info('[VoiceTranscribe] 开始下载 tokens 文件...')
await this.downloadToFile(
MODEL_DOWNLOAD_URLS.tokens,
tokensPath,
'tokens',
(downloaded, total) => {
const modelSize = existsSync(modelPath) ? statSync(modelPath).size : 0
const percent = total ? 40 + (downloaded / total) * 30 : 40
onProgress?.({
modelName: SENSEVOICE_MODEL.name,
downloadedBytes: modelSize + downloaded,
totalBytes: SENSEVOICE_MODEL.sizeBytes,
percent
})
}
)
// 下载 vad 文件 (30%)
console.info('[VoiceTranscribe] 开始下载 VAD 文件...')
await this.downloadToFile(
(MODEL_DOWNLOAD_URLS as any).vad,
vadPath,
'vad',
(downloaded, total) => {
const modelSize = existsSync(modelPath) ? statSync(modelPath).size : 0
const tokensSize = existsSync(tokensPath) ? statSync(tokensPath).size : 0
const percent = total ? 70 + (downloaded / total) * 30 : 70
onProgress?.({
modelName: SENSEVOICE_MODEL.name,
downloadedBytes: modelSize + tokensSize + downloaded,
totalBytes: SENSEVOICE_MODEL.sizeBytes,
percent
})
}
)
console.info('[VoiceTranscribe] 模型下载完成')
return { success: true, modelPath, tokensPath }
} catch (error) {
try { if (existsSync(modelPath)) unlinkSync(modelPath) } catch { }
console.error('[VoiceTranscribe] 下载失败:', error)
const modelPath = this.resolveModelPath(SENSEVOICE_MODEL.files.model)
const tokensPath = this.resolveModelPath(SENSEVOICE_MODEL.files.tokens)
const vadPath = this.resolveModelPath((SENSEVOICE_MODEL.files as any).vad)
try {
if (existsSync(modelPath)) unlinkSync(modelPath)
if (existsSync(tokensPath)) unlinkSync(tokensPath)
if (existsSync(vadPath)) unlinkSync(vadPath)
} catch { }
return { success: false, error: String(error) }
} finally {
this.downloadTasks.delete(cacheKey)
@@ -149,102 +200,108 @@ export class VoiceTranscribeService {
return task
}
async transcribeWavBuffer(wavData: Buffer): Promise<{ success: boolean; transcript?: string; error?: string }> {
const modelName = this.configService.get('whisperModelName') || 'base'
const modelPath = this.resolveModelPath(modelName)
console.info('[VoiceTranscribe] check model', { modelName, modelPath, exists: modelPath ? existsSync(modelPath) : false })
if (!modelPath || !existsSync(modelPath)) {
return { success: false, error: '未下载语音模型,请在设置中下载' }
}
/**
* 转写 WAV 音频数据 (后台 Worker Threads 版本)
*/
async transcribeWavBuffer(
wavData: Buffer,
onPartial?: (text: string) => void
): Promise<{ success: boolean; transcript?: string; error?: string }> {
return new Promise((resolve) => {
try {
const modelPath = this.resolveModelPath(SENSEVOICE_MODEL.files.model)
const tokensPath = this.resolveModelPath(SENSEVOICE_MODEL.files.tokens)
// 使用内置的预编译 whisper-cli.exe
const resourcesPath = app.isPackaged
? join(process.resourcesPath, 'resources')
: join(app.getAppPath(), 'resources')
const whisperExe = join(resourcesPath, 'whisper-cli.exe')
if (!existsSync(whisperExe)) {
return { success: false, error: '找不到语音转写程序,请重新安装应用' }
}
if (!existsSync(modelPath) || !existsSync(tokensPath)) {
resolve({ success: false, error: '模型文件不存在,请先下载模型' })
return
}
const ffmpegPath = getStaticFfmpegPath() || 'ffmpeg'
console.info('[VoiceTranscribe] ffmpeg path', ffmpegPath)
const { Worker } = require('worker_threads')
// main.js 和 transcribeWorker.js 同在 dist-electron 目录下
const workerPath = join(__dirname, 'transcribeWorker.js')
const tempDir = app.getPath('temp')
const fileToken = `${Date.now()}_${Math.random().toString(16).slice(2)}`
const inputPath = join(tempDir, `weflow_voice_${fileToken}.wav`)
const outputPath = join(tempDir, `weflow_voice_${fileToken}_16k.wav`)
console.info('[VoiceTranscribe] 启动后台 Worker 转写...', { workerPath })
try {
writeFileSync(inputPath, wavData)
console.info('[VoiceTranscribe] converting to 16kHz', { inputPath, outputPath })
await execFileAsync(ffmpegPath, ['-y', '-i', inputPath, '-ar', '16000', '-ac', '1', outputPath])
console.info('[VoiceTranscribe] transcribing with whisper', { whisperExe, modelPath })
const { stdout, stderr } = await execFileAsync(whisperExe, [
'-m', modelPath,
'-f', outputPath,
'-l', 'zh',
'-otxt',
'-np' // no prints (只输出结果)
], {
maxBuffer: 10 * 1024 * 1024,
cwd: dirname(whisperExe), // 设置工作目录为 whisper-cli.exe 所在目录,确保能找到 DLL
env: { ...process.env, PATH: `${dirname(whisperExe)};${process.env.PATH}` }
})
const worker = new Worker(workerPath, {
workerData: {
modelPath,
tokensPath,
wavData,
sampleRate: 16000
}
})
console.info('[VoiceTranscribe] whisper stdout:', stdout)
if (stderr) console.warn('[VoiceTranscribe] whisper stderr:', stderr)
let finalTranscript = ''
// 解析输出文本
const outputBase = outputPath.replace(/\.[^.]+$/, '')
const txtFile = `${outputBase}.txt`
let transcript = ''
if (existsSync(txtFile)) {
const { readFileSync } = await import('fs')
transcript = readFileSync(txtFile, 'utf-8').trim()
unlinkSync(txtFile)
} else {
// 从 stdout 提取(使用 -np 参数后stdout 只有转写结果)
transcript = stdout.trim()
worker.on('message', (msg: any) => {
if (msg.type === 'partial') {
onPartial?.(msg.text)
} else if (msg.type === 'final') {
finalTranscript = msg.text
resolve({ success: true, transcript: finalTranscript })
worker.terminate()
} else if (msg.type === 'error') {
resolve({ success: false, error: msg.error })
worker.terminate()
}
})
worker.on('error', (err: Error) => {
console.error('[VoiceTranscribe] Worker error:', err)
resolve({ success: false, error: String(err) })
})
worker.on('exit', (code: number) => {
if (code !== 0) {
console.error(`[VoiceTranscribe] Worker stopped with exit code ${code}`)
resolve({ success: false, error: `Worker exited with code ${code}` })
}
})
} catch (error) {
console.error('[VoiceTranscribe] 启动 Worker 失败:', error)
resolve({ success: false, error: String(error) })
}
console.info('[VoiceTranscribe] success', { transcript })
return { success: true, transcript }
} catch (error: any) {
console.error('[VoiceTranscribe] failed', error)
console.error('[VoiceTranscribe] stderr:', error.stderr)
console.error('[VoiceTranscribe] stdout:', error.stdout)
return { success: false, error: String(error) }
} finally {
try { if (existsSync(inputPath)) unlinkSync(inputPath) } catch { }
try { if (existsSync(outputPath)) unlinkSync(outputPath) } catch { }
}
})
}
/**
* 下载文件
*/
private downloadToFile(
url: string,
targetPath: string,
modelName: string,
onProgress?: (progress: DownloadProgress) => void,
remainingRedirects = 3
fileName: string,
onProgress?: (downloaded: number, total?: number) => void,
remainingRedirects = 5
): Promise<void> {
return new Promise((resolve, reject) => {
const protocol = url.startsWith('https') ? https : http
const request = protocol.get(url, (response) => {
console.info(`[VoiceTranscribe] 下载 ${fileName}:`, url)
const options = {
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
}
const request = protocol.get(url, options, (response) => {
// 处理重定向
if ([301, 302, 303, 307, 308].includes(response.statusCode || 0) && response.headers.location) {
if (remainingRedirects <= 0) {
reject(new Error('下载重定向次数过多'))
reject(new Error('重定向次数过多'))
return
}
this.downloadToFile(response.headers.location, targetPath, modelName, onProgress, remainingRedirects - 1)
console.info(`[VoiceTranscribe] 重定向到:`, response.headers.location)
this.downloadToFile(response.headers.location, targetPath, fileName, onProgress, remainingRedirects - 1)
.then(resolve)
.catch(reject)
return
}
if (response.statusCode !== 200) {
reject(new Error(`下载失败: ${response.statusCode}`))
reject(new Error(`下载失败: HTTP ${response.statusCode}`))
return
}
@@ -255,8 +312,7 @@ export class VoiceTranscribeService {
response.on('data', (chunk) => {
downloadedBytes += chunk.length
const percent = totalBytes ? (downloadedBytes / totalBytes) * 100 : undefined
onProgress?.({ modelName, downloadedBytes, totalBytes, percent })
onProgress?.(downloadedBytes, totalBytes)
})
response.on('error', (error) => {
@@ -271,15 +327,33 @@ export class VoiceTranscribeService {
writer.on('finish', () => {
writer.close()
console.info(`[VoiceTranscribe] ${fileName} 下载完成:`, targetPath)
resolve()
})
response.pipe(writer)
})
request.on('error', reject)
request.on('error', (error) => {
console.error(`[VoiceTranscribe] ${fileName} 下载错误:`, error)
reject(error)
})
})
}
/**
* 清理资源
*/
dispose() {
if (this.recognizer) {
try {
// sherpa-onnx 的 recognizer 可能需要手动释放
this.recognizer = null
} catch (error) {
console.error('[VoiceTranscribe] 释放识别器失败:', error)
}
}
}
}
export const voiceTranscribeService = new VoiceTranscribeService()