feat: 尝试增加一下聊天里面的语音转文字功能

2026-03-24 23:06:51 +00:00 · 2026-01-17 05:14:14 +08:00
parent 095c8f0db6
commit 72e2d82158
18 changed files with 999 additions and 66 deletions
--- a/electron/services/chatService.ts
+++ b/electron/services/chatService.ts
@@ -16,6 +16,7 @@ import { ConfigService } from './config'
 import { wcdbService } from './wcdbService'
 import { MessageCacheService } from './messageCacheService'
 import { ContactCacheService, ContactCacheEntry } from './contactCacheService'
+import { voiceTranscribeService } from './voiceTranscribeService'

 type HardlinkState = {
  db: Database.Database
@@ -83,6 +84,10 @@ class ChatService {
  private hardlinkCache = new Map<string, HardlinkState>()
  private readonly contactCacheService: ContactCacheService
  private readonly messageCacheService: MessageCacheService
+  private voiceWavCache = new Map<string, Buffer>()
+  private voiceTranscriptCache = new Map<string, string>()
+  private voiceTranscriptPending = new Map<string, Promise<{ success: boolean; transcript?: string; error?: string }>>()
+  private readonly voiceCacheMaxEntries = 50

  constructor() {
    this.configService = new ConfigService()
@@ -1738,6 +1743,9 @@ class ChatService {

    if (includeMessages) {
      this.messageCacheService.clear()
+      this.voiceWavCache.clear()
+      this.voiceTranscriptCache.clear()
+      this.voiceTranscriptPending.clear()
    }

    for (const state of this.hardlinkCache.values()) {
@@ -2263,6 +2271,8 @@ class ChatService {
        const pcmData = readFileSync(pcmFile)
        const wavHeader = this.createWavHeader(pcmData.length, 24000, 1) // 微信语音通常 24kHz
        const wavData = Buffer.concat([wavHeader, pcmData])
+        const cacheKey = this.getVoiceCacheKey(sessionId, msgId)
+        this.cacheVoiceWav(cacheKey, wavData)

        return { success: true, data: wavData.toString('base64') }
      } finally {
@@ -2276,6 +2286,45 @@ class ChatService {
    }
  }

+  async getVoiceTranscript(sessionId: string, msgId: string): Promise<{ success: boolean; transcript?: string; error?: string }> {
+    const cacheKey = this.getVoiceCacheKey(sessionId, msgId)
+    const cached = this.voiceTranscriptCache.get(cacheKey)
+    if (cached) {
+      return { success: true, transcript: cached }
+    }
+
+    const pending = this.voiceTranscriptPending.get(cacheKey)
+    if (pending) {
+      return pending
+    }
+
+    const task = (async () => {
+      try {
+        let wavData = this.voiceWavCache.get(cacheKey)
+        if (!wavData) {
+          const voiceResult = await this.getVoiceData(sessionId, msgId)
+          if (!voiceResult.success || !voiceResult.data) {
+            return { success: false, error: voiceResult.error || '语音解码失败' }
+          }
+          wavData = Buffer.from(voiceResult.data, 'base64')
+        }
+
+        const result = await voiceTranscribeService.transcribeWavBuffer(wavData)
+        if (result.success && result.transcript) {
+          this.cacheVoiceTranscript(cacheKey, result.transcript)
+        }
+        return result
+      } catch (error) {
+        return { success: false, error: String(error) }
+      } finally {
+        this.voiceTranscriptPending.delete(cacheKey)
+      }
+    })()
+
+    this.voiceTranscriptPending.set(cacheKey, task)
+    return task
+  }
+
  private createWavHeader(pcmLength: number, sampleRate: number = 24000, channels: number = 1): Buffer {
    const header = Buffer.alloc(44)
    header.write('RIFF', 0)
@@ -2294,6 +2343,26 @@ class ChatService {
    return header
  }

+  private getVoiceCacheKey(sessionId: string, msgId: string): string {
+    return `${sessionId}:${msgId}`
+  }
+
+  private cacheVoiceWav(cacheKey: string, wavData: Buffer): void {
+    this.voiceWavCache.set(cacheKey, wavData)
+    if (this.voiceWavCache.size > this.voiceCacheMaxEntries) {
+      const oldestKey = this.voiceWavCache.keys().next().value
+      if (oldestKey) this.voiceWavCache.delete(oldestKey)
+    }
+  }
+
+  private cacheVoiceTranscript(cacheKey: string, transcript: string): void {
+    this.voiceTranscriptCache.set(cacheKey, transcript)
+    if (this.voiceTranscriptCache.size > this.voiceCacheMaxEntries) {
+      const oldestKey = this.voiceTranscriptCache.keys().next().value
+      if (oldestKey) this.voiceTranscriptCache.delete(oldestKey)
+    }
+  }
+
  async getMessageById(sessionId: string, localId: number): Promise<{ success: boolean; message?: Message; error?: string }> {
    try {
      console.info('[ChatService] getMessageById (SQL)', { sessionId, localId })