Files
WeFlow/electron/transcribeWorker.ts
Forrest dc12df0fcf fix: 清理导出服务日志并简化whisper接口参数
- 移除exportService中的冗余console日志输出
- 简化whisper API接口,移除downloadModel和getModelStatus的payload参数
- 清理图片、表情、语音导出过程中的调试日志
- 移除数据库查询和媒体处理中的详细日志记录
- 优化代码可读性,减少控制台输出噪音
2026-01-17 16:24:18 +08:00

143 lines
4.7 KiB
TypeScript

import { parentPort, workerData } from 'worker_threads'
import * as fs from 'fs'
interface WorkerParams {
modelPath: string
tokensPath: string
wavData: Buffer
sampleRate: number
}
async function run() {
if (!parentPort) {
return;
}
try {
// 动态加载以捕获可能的加载错误(如 C++ 运行库缺失等)
let sherpa: any;
try {
sherpa = require('sherpa-onnx-node');
} catch (requireError) {
parentPort.postMessage({ type: 'error', error: 'Failed to load speech engine: ' + String(requireError) });
return;
}
const { modelPath, tokensPath, wavData: rawWavData, sampleRate } = workerData as WorkerParams
const wavData = Buffer.from(rawWavData);
// 1. 初始化识别器 (SenseVoiceSmall)
const recognizerConfig = {
modelConfig: {
senseVoice: {
model: modelPath,
useInverseTextNormalization: 1
},
tokens: tokensPath,
numThreads: 2,
debug: 0
}
}
const recognizer = new sherpa.OfflineRecognizer(recognizerConfig)
// 2. 初始化 VAD (用于流式输出效果)
const vadPath = modelPath.replace('model.int8.onnx', 'silero_vad.onnx');
const vadConfig = {
sileroVad: {
model: vadPath,
threshold: 0.5,
minSilenceDuration: 0.5,
minSpeechDuration: 0.25,
windowSize: 512
},
sampleRate: sampleRate,
debug: 0,
numThreads: 1
}
// 检查 VAD 模型是否存在,如果不存在则退回到全量识别
if (!fs.existsSync(vadPath)) {
const pcmData = wavData.slice(44)
const samples = new Float32Array(pcmData.length / 2)
for (let i = 0; i < samples.length; i++) {
samples[i] = pcmData.readInt16LE(i * 2) / 32768.0
}
const stream = recognizer.createStream()
stream.acceptWaveform({ sampleRate, samples })
recognizer.decode(stream)
const result = recognizer.getResult(stream)
parentPort.postMessage({ type: 'final', text: result.text })
return
}
const vad = new sherpa.Vad(vadConfig, 60) // 60s max
// 3. 处理音频数据
const pcmData = wavData.slice(44)
const samples = new Float32Array(pcmData.length / 2)
for (let i = 0; i < samples.length; i++) {
samples[i] = pcmData.readInt16LE(i * 2) / 32768.0
}
// 模拟流式输入:按小块喂给 VAD
const chunkSize = 1600 // 100ms for 16kHz
let offset = 0
let accumulatedText = ''
let segmentCount = 0;
while (offset < samples.length) {
const end = Math.min(offset + chunkSize, samples.length)
const chunk = samples.subarray(offset, end)
vad.acceptWaveform(chunk)
// 检查 ASR 结果
while (!vad.isEmpty()) {
const segment = vad.front(false)
const stream = recognizer.createStream()
stream.acceptWaveform({ sampleRate, samples: segment.samples })
recognizer.decode(stream)
const result = recognizer.getResult(stream)
if (result.text) {
const text = result.text.trim();
if (text.length > 0) {
accumulatedText += (accumulatedText ? ' ' : '') + text
segmentCount++;
parentPort.postMessage({ type: 'partial', text: accumulatedText })
}
}
vad.pop()
}
offset = end
// 让出主循环,保持响应
await new Promise(resolve => setImmediate(resolve))
}
// Ensure any remaining buffer is processed
vad.flush();
while (!vad.isEmpty()) {
const segment = vad.front(false);
const stream = recognizer.createStream()
stream.acceptWaveform({ sampleRate, samples: segment.samples })
recognizer.decode(stream)
const result = recognizer.getResult(stream)
if (result.text) {
accumulatedText += (accumulatedText ? ' ' : '') + result.text.trim()
parentPort.postMessage({ type: 'partial', text: accumulatedText })
}
vad.pop();
}
parentPort.postMessage({ type: 'final', text: accumulatedText })
} catch (error) {
parentPort.postMessage({ type: 'error', error: String(error) })
}
}
run();