feat(voice-transcribe): 新增语音转写语言过滤配置功能(支持用户自定义允许的转写语言),优化模型下载的超时处理与进度日志,提升下载稳健性,同步更新相关 UI 样式。

This commit is contained in:
Forrest
2026-01-17 19:54:31 +08:00
parent dc12df0fcf
commit 0853e049c8
9 changed files with 338 additions and 66 deletions

View File

@@ -23,6 +23,8 @@ interface ConfigSchema {
whisperModelName: string
whisperModelDir: string
whisperDownloadSource: string
autoTranscribeVoice: boolean
transcribeLanguages: string[]
}
export class ConfigService {
@@ -48,7 +50,9 @@ export class ConfigService {
llmModelPath: '',
whisperModelName: 'base',
whisperModelDir: '',
whisperDownloadSource: 'tsinghua'
whisperDownloadSource: 'tsinghua',
autoTranscribeVoice: false,
transcribeLanguages: ['zh']
}
})
}

View File

@@ -123,7 +123,16 @@ export class VoiceTranscribeService {
const tokensPath = this.resolveModelPath(SENSEVOICE_MODEL.files.tokens)
const vadPath = this.resolveModelPath((SENSEVOICE_MODEL.files as any).vad)
// 初始进度
onProgress?.({
modelName: SENSEVOICE_MODEL.name,
downloadedBytes: 0,
totalBytes: SENSEVOICE_MODEL.sizeBytes,
percent: 0
})
// 下载模型文件 (40%)
console.info('[VoiceTranscribe] 开始下载模型文件...')
await this.downloadToFile(
MODEL_DOWNLOAD_URLS.model,
modelPath,
@@ -140,6 +149,7 @@ export class VoiceTranscribeService {
)
// 下载 tokens 文件 (30%)
console.info('[VoiceTranscribe] 开始下载 tokens 文件...')
await this.downloadToFile(
MODEL_DOWNLOAD_URLS.tokens,
tokensPath,
@@ -157,6 +167,7 @@ export class VoiceTranscribeService {
)
// 下载 vad 文件 (30%)
console.info('[VoiceTranscribe] 开始下载 VAD 文件...')
await this.downloadToFile(
(MODEL_DOWNLOAD_URLS as any).vad,
vadPath,
@@ -174,6 +185,7 @@ export class VoiceTranscribeService {
}
)
console.info('[VoiceTranscribe] 所有文件下载完成')
return { success: true, modelPath, tokensPath }
} catch (error) {
const modelPath = this.resolveModelPath(SENSEVOICE_MODEL.files.model)
@@ -199,7 +211,8 @@ export class VoiceTranscribeService {
*/
async transcribeWavBuffer(
wavData: Buffer,
onPartial?: (text: string) => void
onPartial?: (text: string) => void,
languages?: string[]
): Promise<{ success: boolean; transcript?: string; error?: string }> {
return new Promise((resolve) => {
try {
@@ -211,6 +224,16 @@ export class VoiceTranscribeService {
return
}
// 获取配置的语言列表,如果没有传入则从配置读取
let supportedLanguages = languages
if (!supportedLanguages || supportedLanguages.length === 0) {
supportedLanguages = this.configService.get('transcribeLanguages')
// 如果配置中也没有或为空,使用默认值
if (!supportedLanguages || supportedLanguages.length === 0) {
supportedLanguages = ['zh']
}
}
const { Worker } = require('worker_threads')
// main.js 和 transcribeWorker.js 同在 dist-electron 目录下
const workerPath = join(__dirname, 'transcribeWorker.js')
@@ -220,7 +243,8 @@ export class VoiceTranscribeService {
modelPath,
tokensPath,
wavData,
sampleRate: 16000
sampleRate: 16000,
languages: supportedLanguages
}
})
@@ -273,10 +297,13 @@ export class VoiceTranscribeService {
const options = {
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
},
timeout: 30000 // 30秒连接超时
}
const request = protocol.get(url, options, (response) => {
console.info(`[VoiceTranscribe] ${fileName} 响应状态:`, response.statusCode)
// 处理重定向
if ([301, 302, 303, 307, 308].includes(response.statusCode || 0) && response.headers.location) {
if (remainingRedirects <= 0) {
@@ -297,25 +324,44 @@ export class VoiceTranscribeService {
const totalBytes = Number(response.headers['content-length'] || 0) || undefined
let downloadedBytes = 0
console.info(`[VoiceTranscribe] ${fileName} 文件大小:`, totalBytes ? `${(totalBytes / 1024 / 1024).toFixed(2)} MB` : '未知')
const writer = createWriteStream(targetPath)
// 设置数据接收超时60秒没有数据则超时
let lastDataTime = Date.now()
const dataTimeout = setInterval(() => {
if (Date.now() - lastDataTime > 60000) {
clearInterval(dataTimeout)
response.destroy()
writer.close()
reject(new Error('下载超时60秒内未收到数据'))
}
}, 5000)
response.on('data', (chunk) => {
lastDataTime = Date.now()
downloadedBytes += chunk.length
onProgress?.(downloadedBytes, totalBytes)
})
response.on('error', (error) => {
clearInterval(dataTimeout)
try { writer.close() } catch { }
console.error(`[VoiceTranscribe] ${fileName} 响应错误:`, error)
reject(error)
})
writer.on('error', (error) => {
clearInterval(dataTimeout)
try { writer.close() } catch { }
console.error(`[VoiceTranscribe] ${fileName} 写入错误:`, error)
reject(error)
})
writer.on('finish', () => {
clearInterval(dataTimeout)
writer.close()
console.info(`[VoiceTranscribe] ${fileName} 下载完成:`, targetPath)
resolve()
@@ -324,8 +370,14 @@ export class VoiceTranscribeService {
response.pipe(writer)
})
request.on('timeout', () => {
request.destroy()
console.error(`[VoiceTranscribe] ${fileName} 连接超时`)
reject(new Error('连接超时'))
})
request.on('error', (error) => {
console.error(`[VoiceTranscribe] ${fileName} 下载错误:`, error)
console.error(`[VoiceTranscribe] ${fileName} 请求错误:`, error)
reject(error)
})
})

View File

@@ -6,6 +6,43 @@ interface WorkerParams {
tokensPath: string
wavData: Buffer
sampleRate: number
languages?: string[]
}
// 语言标记映射
const LANGUAGE_TAGS: Record<string, string> = {
'zh': '<|zh|>',
'en': '<|en|>',
'ja': '<|ja|>',
'ko': '<|ko|>',
'yue': '<|yue|>' // 粤语
}
// 检查识别结果是否在允许的语言列表中
function isLanguageAllowed(result: any, allowedLanguages: string[]): boolean {
if (!result || !result.lang) {
// 如果没有语言信息,默认允许
return true
}
// 如果没有指定语言或语言列表为空,默认只允许中文
if (!allowedLanguages || allowedLanguages.length === 0) {
allowedLanguages = ['zh']
}
const langTag = result.lang
console.log('[TranscribeWorker] 检测到语言标记:', langTag)
// 检查是否在允许的语言列表中
for (const lang of allowedLanguages) {
if (LANGUAGE_TAGS[lang] === langTag) {
console.log('[TranscribeWorker] 语言匹配,允许:', lang)
return true
}
}
console.log('[TranscribeWorker] 语言不在白名单中,过滤掉')
return false
}
async function run() {
@@ -23,8 +60,16 @@ async function run() {
return;
}
const { modelPath, tokensPath, wavData: rawWavData, sampleRate } = workerData as WorkerParams
const { modelPath, tokensPath, wavData: rawWavData, sampleRate, languages } = workerData as WorkerParams
const wavData = Buffer.from(rawWavData);
// 确保有有效的语言列表,默认只允许中文
let allowedLanguages = languages || ['zh']
if (allowedLanguages.length === 0) {
allowedLanguages = ['zh']
}
console.log('[TranscribeWorker] 使用的语言白名单:', allowedLanguages)
// 1. 初始化识别器 (SenseVoiceSmall)
const recognizerConfig = {
modelConfig: {
@@ -66,7 +111,16 @@ async function run() {
recognizer.decode(stream)
const result = recognizer.getResult(stream)
parentPort.postMessage({ type: 'final', text: result.text })
console.log('[TranscribeWorker] 非VAD模式 - 识别结果对象:', JSON.stringify(result, null, 2))
// 检查语言是否在白名单中
if (isLanguageAllowed(result, allowedLanguages)) {
console.log('[TranscribeWorker] 非VAD模式 - 保留文本:', result.text)
parentPort.postMessage({ type: 'final', text: result.text })
} else {
console.log('[TranscribeWorker] 非VAD模式 - 语言不匹配,返回空文本')
parentPort.postMessage({ type: 'final', text: '' })
}
return
}
@@ -100,13 +154,18 @@ async function run() {
recognizer.decode(stream)
const result = recognizer.getResult(stream)
if (result.text) {
const text = result.text.trim();
console.log('[TranscribeWorker] 识别结果 - lang:', result.lang, 'text:', result.text)
// 检查语言是否在白名单中
if (result.text && isLanguageAllowed(result, allowedLanguages)) {
const text = result.text.trim()
if (text.length > 0) {
accumulatedText += (accumulatedText ? ' ' : '') + text
segmentCount++;
parentPort.postMessage({ type: 'partial', text: accumulatedText })
}
} else if (result.text) {
console.log('[TranscribeWorker] 跳过不匹配的语言段落')
}
vad.pop()
}
@@ -124,9 +183,16 @@ async function run() {
stream.acceptWaveform({ sampleRate, samples: segment.samples })
recognizer.decode(stream)
const result = recognizer.getResult(stream)
if (result.text) {
accumulatedText += (accumulatedText ? ' ' : '') + result.text.trim()
parentPort.postMessage({ type: 'partial', text: accumulatedText })
console.log('[TranscribeWorker] flush阶段 - lang:', result.lang, 'text:', result.text)
// 检查语言是否在白名单中
if (result.text && isLanguageAllowed(result, allowedLanguages)) {
const text = result.text.trim()
if (text) {
accumulatedText += (accumulatedText ? ' ' : '') + text
parentPort.postMessage({ type: 'partial', text: accumulatedText })
}
}
vad.pop();
}