mirror of
https://github.com/hicccc77/WeFlow.git
synced 2026-03-24 23:06:51 +00:00
- 移除exportService中的冗余console日志输出 - 简化whisper API接口,移除downloadModel和getModelStatus的payload参数 - 清理图片、表情、语音导出过程中的调试日志 - 移除数据库查询和媒体处理中的详细日志记录 - 优化代码可读性,减少控制台输出噪音
350 lines
11 KiB
TypeScript
350 lines
11 KiB
TypeScript
import { app } from 'electron'
|
|
import { existsSync, mkdirSync, statSync, unlinkSync, createWriteStream } from 'fs'
|
|
import { join } from 'path'
|
|
import * as https from 'https'
|
|
import * as http from 'http'
|
|
import { ConfigService } from './config'
|
|
|
|
// Sherpa-onnx 类型定义
|
|
type OfflineRecognizer = any
|
|
type OfflineStream = any
|
|
|
|
type ModelInfo = {
|
|
name: string
|
|
files: {
|
|
model: string
|
|
tokens: string
|
|
vad: string
|
|
}
|
|
sizeBytes: number
|
|
sizeLabel: string
|
|
}
|
|
|
|
type DownloadProgress = {
|
|
modelName: string
|
|
downloadedBytes: number
|
|
totalBytes?: number
|
|
percent?: number
|
|
}
|
|
|
|
const SENSEVOICE_MODEL: ModelInfo = {
|
|
name: 'SenseVoiceSmall',
|
|
files: {
|
|
model: 'model.int8.onnx',
|
|
tokens: 'tokens.txt',
|
|
vad: 'silero_vad.onnx'
|
|
},
|
|
sizeBytes: 245_000_000,
|
|
sizeLabel: '245 MB'
|
|
}
|
|
|
|
const MODEL_DOWNLOAD_URLS = {
|
|
model: 'https://modelscope.cn/models/pengzhendong/sherpa-onnx-sense-voice-zh-en-ja-ko-yue/resolve/master/model.int8.onnx',
|
|
tokens: 'https://modelscope.cn/models/pengzhendong/sherpa-onnx-sense-voice-zh-en-ja-ko-yue/resolve/master/tokens.txt',
|
|
vad: 'https://www.modelscope.cn/models/manyeyes/silero-vad-onnx/resolve/master/silero_vad.onnx'
|
|
}
|
|
|
|
export class VoiceTranscribeService {
|
|
private configService = new ConfigService()
|
|
private downloadTasks = new Map<string, Promise<{ success: boolean; path?: string; error?: string }>>()
|
|
private recognizer: OfflineRecognizer | null = null
|
|
private isInitializing = false
|
|
|
|
private resolveModelDir(): string {
|
|
const configured = this.configService.get('whisperModelDir') as string | undefined
|
|
if (configured) return configured
|
|
return join(app.getPath('documents'), 'WeFlow', 'models', 'sensevoice')
|
|
}
|
|
|
|
private resolveModelPath(fileName: string): string {
|
|
return join(this.resolveModelDir(), fileName)
|
|
}
|
|
|
|
/**
|
|
* 检查模型状态
|
|
*/
|
|
async getModelStatus(): Promise<{
|
|
success: boolean
|
|
exists?: boolean
|
|
modelPath?: string
|
|
tokensPath?: string
|
|
sizeBytes?: number
|
|
error?: string
|
|
}> {
|
|
try {
|
|
const modelPath = this.resolveModelPath(SENSEVOICE_MODEL.files.model)
|
|
const tokensPath = this.resolveModelPath(SENSEVOICE_MODEL.files.tokens)
|
|
const vadPath = this.resolveModelPath((SENSEVOICE_MODEL.files as any).vad)
|
|
|
|
const modelExists = existsSync(modelPath)
|
|
const tokensExists = existsSync(tokensPath)
|
|
const vadExists = existsSync(vadPath)
|
|
const exists = modelExists && tokensExists && vadExists
|
|
|
|
if (!exists) {
|
|
return { success: true, exists: false, modelPath, tokensPath }
|
|
}
|
|
|
|
const modelSize = statSync(modelPath).size
|
|
const tokensSize = statSync(tokensPath).size
|
|
const vadSize = statSync(vadPath).size
|
|
const totalSize = modelSize + tokensSize + vadSize
|
|
|
|
return {
|
|
success: true,
|
|
exists: true,
|
|
modelPath,
|
|
tokensPath,
|
|
sizeBytes: totalSize
|
|
}
|
|
} catch (error) {
|
|
return { success: false, error: String(error) }
|
|
}
|
|
}
|
|
|
|
/**
|
|
* 下载模型文件
|
|
*/
|
|
async downloadModel(
|
|
onProgress?: (progress: DownloadProgress) => void
|
|
): Promise<{ success: boolean; modelPath?: string; tokensPath?: string; error?: string }> {
|
|
const cacheKey = 'sensevoice'
|
|
const pending = this.downloadTasks.get(cacheKey)
|
|
if (pending) return pending
|
|
|
|
const task = (async () => {
|
|
try {
|
|
const modelDir = this.resolveModelDir()
|
|
if (!existsSync(modelDir)) {
|
|
mkdirSync(modelDir, { recursive: true })
|
|
}
|
|
|
|
const modelPath = this.resolveModelPath(SENSEVOICE_MODEL.files.model)
|
|
const tokensPath = this.resolveModelPath(SENSEVOICE_MODEL.files.tokens)
|
|
const vadPath = this.resolveModelPath((SENSEVOICE_MODEL.files as any).vad)
|
|
|
|
// 下载模型文件 (40%)
|
|
await this.downloadToFile(
|
|
MODEL_DOWNLOAD_URLS.model,
|
|
modelPath,
|
|
'model',
|
|
(downloaded, total) => {
|
|
const percent = total ? (downloaded / total) * 40 : undefined
|
|
onProgress?.({
|
|
modelName: SENSEVOICE_MODEL.name,
|
|
downloadedBytes: downloaded,
|
|
totalBytes: SENSEVOICE_MODEL.sizeBytes,
|
|
percent
|
|
})
|
|
}
|
|
)
|
|
|
|
// 下载 tokens 文件 (30%)
|
|
await this.downloadToFile(
|
|
MODEL_DOWNLOAD_URLS.tokens,
|
|
tokensPath,
|
|
'tokens',
|
|
(downloaded, total) => {
|
|
const modelSize = existsSync(modelPath) ? statSync(modelPath).size : 0
|
|
const percent = total ? 40 + (downloaded / total) * 30 : 40
|
|
onProgress?.({
|
|
modelName: SENSEVOICE_MODEL.name,
|
|
downloadedBytes: modelSize + downloaded,
|
|
totalBytes: SENSEVOICE_MODEL.sizeBytes,
|
|
percent
|
|
})
|
|
}
|
|
)
|
|
|
|
// 下载 vad 文件 (30%)
|
|
await this.downloadToFile(
|
|
(MODEL_DOWNLOAD_URLS as any).vad,
|
|
vadPath,
|
|
'vad',
|
|
(downloaded, total) => {
|
|
const modelSize = existsSync(modelPath) ? statSync(modelPath).size : 0
|
|
const tokensSize = existsSync(tokensPath) ? statSync(tokensPath).size : 0
|
|
const percent = total ? 70 + (downloaded / total) * 30 : 70
|
|
onProgress?.({
|
|
modelName: SENSEVOICE_MODEL.name,
|
|
downloadedBytes: modelSize + tokensSize + downloaded,
|
|
totalBytes: SENSEVOICE_MODEL.sizeBytes,
|
|
percent
|
|
})
|
|
}
|
|
)
|
|
|
|
return { success: true, modelPath, tokensPath }
|
|
} catch (error) {
|
|
const modelPath = this.resolveModelPath(SENSEVOICE_MODEL.files.model)
|
|
const tokensPath = this.resolveModelPath(SENSEVOICE_MODEL.files.tokens)
|
|
const vadPath = this.resolveModelPath((SENSEVOICE_MODEL.files as any).vad)
|
|
try {
|
|
if (existsSync(modelPath)) unlinkSync(modelPath)
|
|
if (existsSync(tokensPath)) unlinkSync(tokensPath)
|
|
if (existsSync(vadPath)) unlinkSync(vadPath)
|
|
} catch { }
|
|
return { success: false, error: String(error) }
|
|
} finally {
|
|
this.downloadTasks.delete(cacheKey)
|
|
}
|
|
})()
|
|
|
|
this.downloadTasks.set(cacheKey, task)
|
|
return task
|
|
}
|
|
|
|
/**
|
|
* 转写 WAV 音频数据 (后台 Worker Threads 版本)
|
|
*/
|
|
async transcribeWavBuffer(
|
|
wavData: Buffer,
|
|
onPartial?: (text: string) => void
|
|
): Promise<{ success: boolean; transcript?: string; error?: string }> {
|
|
return new Promise((resolve) => {
|
|
try {
|
|
const modelPath = this.resolveModelPath(SENSEVOICE_MODEL.files.model)
|
|
const tokensPath = this.resolveModelPath(SENSEVOICE_MODEL.files.tokens)
|
|
|
|
if (!existsSync(modelPath) || !existsSync(tokensPath)) {
|
|
resolve({ success: false, error: '模型文件不存在,请先下载模型' })
|
|
return
|
|
}
|
|
|
|
const { Worker } = require('worker_threads')
|
|
// main.js 和 transcribeWorker.js 同在 dist-electron 目录下
|
|
const workerPath = join(__dirname, 'transcribeWorker.js')
|
|
|
|
const worker = new Worker(workerPath, {
|
|
workerData: {
|
|
modelPath,
|
|
tokensPath,
|
|
wavData,
|
|
sampleRate: 16000
|
|
}
|
|
})
|
|
|
|
let finalTranscript = ''
|
|
|
|
worker.on('message', (msg: any) => {
|
|
if (msg.type === 'partial') {
|
|
onPartial?.(msg.text)
|
|
} else if (msg.type === 'final') {
|
|
finalTranscript = msg.text
|
|
resolve({ success: true, transcript: finalTranscript })
|
|
worker.terminate()
|
|
} else if (msg.type === 'error') {
|
|
resolve({ success: false, error: msg.error })
|
|
worker.terminate()
|
|
}
|
|
})
|
|
|
|
worker.on('error', (err: Error) => {
|
|
resolve({ success: false, error: String(err) })
|
|
})
|
|
|
|
worker.on('exit', (code: number) => {
|
|
if (code !== 0) {
|
|
console.error(`[VoiceTranscribe] Worker stopped with exit code ${code}`)
|
|
resolve({ success: false, error: `Worker exited with code ${code}` })
|
|
}
|
|
})
|
|
|
|
} catch (error) {
|
|
resolve({ success: false, error: String(error) })
|
|
}
|
|
})
|
|
}
|
|
|
|
/**
|
|
* 下载文件
|
|
*/
|
|
private downloadToFile(
|
|
url: string,
|
|
targetPath: string,
|
|
fileName: string,
|
|
onProgress?: (downloaded: number, total?: number) => void,
|
|
remainingRedirects = 5
|
|
): Promise<void> {
|
|
return new Promise((resolve, reject) => {
|
|
const protocol = url.startsWith('https') ? https : http
|
|
console.info(`[VoiceTranscribe] 下载 ${fileName}:`, url)
|
|
|
|
const options = {
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
|
}
|
|
}
|
|
|
|
const request = protocol.get(url, options, (response) => {
|
|
// 处理重定向
|
|
if ([301, 302, 303, 307, 308].includes(response.statusCode || 0) && response.headers.location) {
|
|
if (remainingRedirects <= 0) {
|
|
reject(new Error('重定向次数过多'))
|
|
return
|
|
}
|
|
console.info(`[VoiceTranscribe] 重定向到:`, response.headers.location)
|
|
this.downloadToFile(response.headers.location, targetPath, fileName, onProgress, remainingRedirects - 1)
|
|
.then(resolve)
|
|
.catch(reject)
|
|
return
|
|
}
|
|
|
|
if (response.statusCode !== 200) {
|
|
reject(new Error(`下载失败: HTTP ${response.statusCode}`))
|
|
return
|
|
}
|
|
|
|
const totalBytes = Number(response.headers['content-length'] || 0) || undefined
|
|
let downloadedBytes = 0
|
|
|
|
const writer = createWriteStream(targetPath)
|
|
|
|
response.on('data', (chunk) => {
|
|
downloadedBytes += chunk.length
|
|
onProgress?.(downloadedBytes, totalBytes)
|
|
})
|
|
|
|
response.on('error', (error) => {
|
|
try { writer.close() } catch { }
|
|
reject(error)
|
|
})
|
|
|
|
writer.on('error', (error) => {
|
|
try { writer.close() } catch { }
|
|
reject(error)
|
|
})
|
|
|
|
writer.on('finish', () => {
|
|
writer.close()
|
|
console.info(`[VoiceTranscribe] ${fileName} 下载完成:`, targetPath)
|
|
resolve()
|
|
})
|
|
|
|
response.pipe(writer)
|
|
})
|
|
|
|
request.on('error', (error) => {
|
|
console.error(`[VoiceTranscribe] ${fileName} 下载错误:`, error)
|
|
reject(error)
|
|
})
|
|
})
|
|
}
|
|
|
|
/**
|
|
* 清理资源
|
|
*/
|
|
dispose() {
|
|
if (this.recognizer) {
|
|
try {
|
|
// sherpa-onnx 的 recognizer 可能需要手动释放
|
|
this.recognizer = null
|
|
} catch (error) {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
export const voiceTranscribeService = new VoiceTranscribeService()
|
|
|