Files
WeFlow/electron/transcribeWorker.ts

250 lines
7.7 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { parentPort, workerData } from 'worker_threads'
import { existsSync } from 'fs'
import { join } from 'path'
interface WorkerParams {
modelPath: string
tokensPath: string
wavData: Buffer | Uint8Array | { type: 'Buffer'; data: number[] }
sampleRate: number
languages?: string[]
}
function appendLibrarySearchPath(libDir: string): void {
if (!existsSync(libDir)) return
if (process.platform === 'darwin') {
const current = process.env.DYLD_LIBRARY_PATH || ''
const paths = current.split(':').filter(Boolean)
if (!paths.includes(libDir)) {
process.env.DYLD_LIBRARY_PATH = [libDir, ...paths].join(':')
}
return
}
if (process.platform === 'linux') {
const current = process.env.LD_LIBRARY_PATH || ''
const paths = current.split(':').filter(Boolean)
if (!paths.includes(libDir)) {
process.env.LD_LIBRARY_PATH = [libDir, ...paths].join(':')
}
}
}
function prepareSherpaRuntimeEnv(): void {
const platform = process.platform === 'win32' ? 'win' : process.platform
const platformPkg = `sherpa-onnx-${platform}-${process.arch}`
const resourcesPath = (process as any).resourcesPath as string | undefined
const candidates = [
// Dev: /project/dist-electron -> /project/node_modules/...
join(__dirname, '..', 'node_modules', platformPkg),
// Fallback for alternate layouts
join(__dirname, 'node_modules', platformPkg),
join(process.cwd(), 'node_modules', platformPkg),
// Packaged app: Resources/app.asar.unpacked/node_modules/...
resourcesPath ? join(resourcesPath, 'app.asar.unpacked', 'node_modules', platformPkg) : ''
].filter(Boolean)
for (const dir of candidates) {
appendLibrarySearchPath(dir)
}
}
// 语言标记映射
const LANGUAGE_TAGS: Record<string, string> = {
'zh': '<|zh|>',
'en': '<|en|>',
'ja': '<|ja|>',
'ko': '<|ko|>',
'yue': '<|yue|>' // 粤语
}
// 技术标签识别语言、语速、ITN等需要从最终文本中移除
const TECH_TAGS = [
'<|zh|>', '<|en|>', '<|ja|>', '<|ko|>', '<|yue|>',
'<|nospeech|>', '<|speech|>',
'<|itn|>', '<|wo_itn|>',
'<|NORMAL|>'
]
// 情感与事件标签映射,转换为直观的 Emoji
const RICH_TAG_MAP: Record<string, string> = {
'<|HAPPY|>': '😊',
'<|SAD|>': '😔',
'<|ANGRY|>': '😠',
'<|NEUTRAL|>': '', // 中性情感不特别标记
'<|FEARFUL|>': '😨',
'<|DISGUSTED|>': '🤢',
'<|SURPRISED|>': '😮',
'<|BGM|>': '🎵',
'<|Applause|>': '👏',
'<|Laughter|>': '😂',
'<|Cry|>': '😭',
'<|Cough|>': ' (咳嗽) ',
'<|Sneeze|>': ' (喷嚏) ',
}
/**
* 富文本后处理:移除技术标签,转换识别出的情感和声音事件
*/
function richTranscribePostProcess(text: string): string {
if (!text) return ''
let processed = text
// 1. 转换情感和事件标签
for (const [tag, replacement] of Object.entries(RICH_TAG_MAP)) {
// 使用正则全局替换,不区分大小写以防不同版本差异
const escapedTag = tag.replace(/[|<>]/g, '\\$&')
processed = processed.replace(new RegExp(escapedTag, 'gi'), replacement)
}
// 2. 移除所有剩余的技术标签
for (const tag of TECH_TAGS) {
const escapedTag = tag.replace(/[|<>]/g, '\\$&')
processed = processed.replace(new RegExp(escapedTag, 'gi'), '')
}
// 3. 清理多余空格并返回
return processed.replace(/\s+/g, ' ').trim()
}
// 检查识别结果是否在允许的语言列表中
function isLanguageAllowed(result: any, allowedLanguages: string[]): boolean {
if (!result || !result.lang) {
// 如果没有语言信息,默认允许(或从文本开头尝试提取)
return true
}
// 如果没有指定语言或语言列表为空,默认允许中文和粤语
if (!allowedLanguages || allowedLanguages.length === 0) {
allowedLanguages = ['zh', 'yue']
}
const langTag = result.lang
// 检查是否在允许的语言列表中
for (const lang of allowedLanguages) {
if (LANGUAGE_TAGS[lang] === langTag) {
return true
}
}
return false
}
async function run() {
const isForkProcess = !parentPort
const emit = (msg: any) => {
if (parentPort) {
parentPort.postMessage(msg)
return
}
if (typeof process.send === 'function') {
process.send(msg)
}
}
const normalizeBuffer = (data: WorkerParams['wavData']): Buffer => {
if (Buffer.isBuffer(data)) return data
if (data instanceof Uint8Array) return Buffer.from(data)
if (data && typeof data === 'object' && (data as any).type === 'Buffer' && Array.isArray((data as any).data)) {
return Buffer.from((data as any).data)
}
return Buffer.alloc(0)
}
const readParams = async (): Promise<WorkerParams | null> => {
if (parentPort) {
return workerData as WorkerParams
}
return new Promise((resolve) => {
let settled = false
const finish = (value: WorkerParams | null) => {
if (settled) return
settled = true
resolve(value)
}
process.once('message', (msg) => finish(msg as WorkerParams))
process.once('disconnect', () => finish(null))
})
}
try {
prepareSherpaRuntimeEnv()
const params = await readParams()
if (!params) return
// 动态加载以捕获可能的加载错误(如 C++ 运行库缺失等)
let sherpa: any;
try {
sherpa = require('sherpa-onnx-node');
} catch (requireError) {
emit({ type: 'error', error: 'Failed to load speech engine: ' + String(requireError) });
if (isForkProcess) process.exit(1)
return;
}
const { modelPath, tokensPath, wavData: rawWavData, sampleRate, languages } = params
const wavData = normalizeBuffer(rawWavData);
// 确保有有效的语言列表,默认只允许中文
let allowedLanguages = languages || ['zh']
if (allowedLanguages.length === 0) {
allowedLanguages = ['zh']
}
// 1. 初始化识别器 (SenseVoiceSmall)
const recognizerConfig = {
modelConfig: {
senseVoice: {
model: modelPath,
useInverseTextNormalization: 1
},
tokens: tokensPath,
numThreads: 2,
debug: 0
}
}
const recognizer = new sherpa.OfflineRecognizer(recognizerConfig)
// 2. 处理音频数据 (全量识别)
const pcmData = wavData.slice(44)
const samples = new Float32Array(pcmData.length / 2)
for (let i = 0; i < samples.length; i++) {
samples[i] = pcmData.readInt16LE(i * 2) / 32768.0
}
const stream = recognizer.createStream()
stream.acceptWaveform({ sampleRate, samples })
recognizer.decode(stream)
const result = recognizer.getResult(stream)
// 3. 检查语言是否在白名单中
if (isLanguageAllowed(result, allowedLanguages)) {
const processedText = richTranscribePostProcess(result.text)
emit({ type: 'final', text: processedText })
if (isForkProcess) process.exit(0)
} else {
emit({ type: 'final', text: '' })
if (isForkProcess) process.exit(0)
}
} catch (error) {
emit({ type: 'error', error: String(error) })
if (isForkProcess) process.exit(1)
}
}
run();