feat: 实现语音转文字并支持流式输出；

fix: 修复了语音解密失败的问题
2026-05-08 23:16:44 +00:00 · 2026-01-17 14:16:54 +08:00
parent 650de55202
commit e8babd48b6
33 changed files with 1713 additions and 570 deletions
--- a/electron/main.ts
+++ b/electron/main.ts
@@ -439,12 +439,14 @@ function registerIpcHandlers() {
    return chatService.getImageData(sessionId, msgId)
  })

-  ipcMain.handle('chat:getVoiceData', async (_, sessionId: string, msgId: string) => {
-    return chatService.getVoiceData(sessionId, msgId)
+  ipcMain.handle('chat:getVoiceData', async (_, sessionId: string, msgId: string, createTime?: number, serverId?: string | number) => {
+    return chatService.getVoiceData(sessionId, msgId, createTime, serverId)
  })

-  ipcMain.handle('chat:getVoiceTranscript', async (_, sessionId: string, msgId: string) => {
-    return chatService.getVoiceTranscript(sessionId, msgId)
+  ipcMain.handle('chat:getVoiceTranscript', async (event, sessionId: string, msgId: string) => {
+    return chatService.getVoiceTranscript(sessionId, msgId, (text) => {
+      event.sender.send('chat:voiceTranscriptPartial', { msgId, text })
+    })
  })

  ipcMain.handle('chat:getMessageById', async (_, sessionId: string, localId: number) => {
@@ -521,14 +523,14 @@ function registerIpcHandlers() {
    return { success: true }
  })

-  ipcMain.handle('whisper:downloadModel', async (event, payload: { modelName: string; downloadDir?: string; source?: string }) => {
-    return voiceTranscribeService.downloadModel(payload, (progress) => {
+  ipcMain.handle('whisper:downloadModel', async (event) => {
+    return voiceTranscribeService.downloadModel((progress) => {
      event.sender.send('whisper:downloadProgress', progress)
    })
  })

-  ipcMain.handle('whisper:getModelStatus', async (_, payload: { modelName: string; downloadDir?: string }) => {
-    return voiceTranscribeService.getModelStatus(payload)
+  ipcMain.handle('whisper:getModelStatus', async () => {
+    return voiceTranscribeService.getModelStatus()
  })

  // 群聊分析相关
--- a/electron/preload.ts
+++ b/electron/preload.ts
@@ -106,8 +106,14 @@ contextBridge.exposeInMainWorld('electronAPI', {
    close: () => ipcRenderer.invoke('chat:close'),
    getSessionDetail: (sessionId: string) => ipcRenderer.invoke('chat:getSessionDetail', sessionId),
    getImageData: (sessionId: string, msgId: string) => ipcRenderer.invoke('chat:getImageData', sessionId, msgId),
-    getVoiceData: (sessionId: string, msgId: string) => ipcRenderer.invoke('chat:getVoiceData', sessionId, msgId),
-    getVoiceTranscript: (sessionId: string, msgId: string) => ipcRenderer.invoke('chat:getVoiceTranscript', sessionId, msgId)
+    getVoiceData: (sessionId: string, msgId: string, createTime?: number, serverId?: string | number) =>
+      ipcRenderer.invoke('chat:getVoiceData', sessionId, msgId, createTime, serverId),
+    getVoiceTranscript: (sessionId: string, msgId: string) => ipcRenderer.invoke('chat:getVoiceTranscript', sessionId, msgId),
+    onVoiceTranscriptPartial: (callback: (payload: { msgId: string; text: string }) => void) => {
+      const listener = (_: any, payload: { msgId: string; text: string }) => callback(payload)
+      ipcRenderer.on('chat:voiceTranscriptPartial', listener)
+      return () => ipcRenderer.removeListener('chat:voiceTranscriptPartial', listener)
+    }
  },


--- a/electron/services/analyticsService.ts
+++ b/electron/services/analyticsService.ts
@@ -324,7 +324,7 @@ class AnalyticsService {
  }

  private getCacheFilePath(): string {
-    return join(app.getPath('userData'), 'analytics_cache.json')
+    return join(app.getPath('documents'), 'WeFlow', 'analytics_cache.json')
  }

  private async loadCacheFromFile(): Promise<{ key: string; data: any; updatedAt: number } | null> {
--- a/electron/services/chatService.ts
+++ b/electron/services/chatService.ts
@@ -7,11 +7,7 @@ import * as http from 'http'
 import * as fzstd from 'fzstd'
 import * as crypto from 'crypto'
 import Database from 'better-sqlite3'
-import { execFile } from 'child_process'
-import { promisify } from 'util'
 import { app } from 'electron'
-
-const execFileAsync = promisify(execFile)
 import { ConfigService } from './config'
 import { wcdbService } from './wcdbService'
 import { MessageCacheService } from './messageCacheService'
@@ -2149,7 +2145,107 @@ class ChatService {
    }
  }

-  async getVoiceData(sessionId: string, msgId: string): Promise<{ success: boolean; data?: string; error?: string }> {
+  /**
+   * getVoiceData (优化的 C++ 实现 + 文件缓存)
+   */
+  async getVoiceData(sessionId: string, msgId: string, createTime?: number, serverId?: string | number): Promise<{ success: boolean; data?: string; error?: string }> {
+
+    try {
+      const localId = parseInt(msgId, 10)
+      if (isNaN(localId)) {
+        return { success: false, error: '无效的消息ID' }
+      }
+
+      // 检查文件缓存
+      const cacheKey = this.getVoiceCacheKey(sessionId, msgId)
+      const cachedFile = this.getVoiceCacheFilePath(cacheKey)
+      if (existsSync(cachedFile)) {
+        try {
+          const wavData = readFileSync(cachedFile)
+          console.info('[ChatService][Voice] 使用缓存文件:', cachedFile)
+          return { success: true, data: wavData.toString('base64') }
+        } catch (e) {
+          console.error('[ChatService][Voice] 读取缓存失败:', e)
+          // 继续重新解密
+        }
+      }
+
+      // 1. 确定 createTime 和 svrId
+      let msgCreateTime = createTime
+      let msgSvrId: string | number = serverId || 0
+
+      // 如果提供了传来的参数，验证其有效性
+      if (!msgCreateTime || msgCreateTime === 0) {
+        const msgResult = await this.getMessageByLocalId(sessionId, localId)
+        if (msgResult.success && msgResult.message) {
+          const msg = msgResult.message as any
+          msgCreateTime = msg.createTime || msg.create_time
+          // 尝试获取各种可能的 server id 列名 (只有在没有传入 serverId 时才查找)
+          if (!msgSvrId || msgSvrId === 0) {
+            msgSvrId = msg.serverId || msg.svr_id || msg.msg_svr_id || msg.message_id || 0
+          }
+        }
+      }
+
+      if (!msgCreateTime) {
+        return { success: false, error: '未找到消息时间戳' }
+      }
+
+      // 2. 构建查找候选 (sessionId, myWxid)
+      const candidates: string[] = []
+      if (sessionId) candidates.push(sessionId)
+      const myWxid = this.configService.get('myWxid') as string
+      if (myWxid && !candidates.includes(myWxid)) {
+        candidates.push(myWxid)
+      }
+
+
+
+      // 3. 调用 C++ 接口获取语音 (Hex)
+      const voiceRes = await wcdbService.getVoiceData(sessionId, msgCreateTime, candidates, msgSvrId)
+      if (!voiceRes.success || !voiceRes.hex) {
+        return { success: false, error: voiceRes.error || '未找到语音数据' }
+      }
+
+
+
+      // 4. Hex 转 Buffer (Silk)
+      const silkData = Buffer.from(voiceRes.hex, 'hex')
+
+      // 5. 使用 silk-wasm 解码
+      try {
+        const pcmData = await this.decodeSilkToPcm(silkData, 24000)
+        if (!pcmData) {
+          return { success: false, error: 'Silk 解码失败' }
+        }
+
+        // PCM -> WAV
+        const wavData = this.createWavBuffer(pcmData, 24000)
+
+        // 保存到文件缓存
+        try {
+          this.saveVoiceCache(cacheKey, wavData)
+          console.info('[ChatService][Voice] 已保存缓存:', cachedFile)
+        } catch (e) {
+          console.error('[ChatService][Voice] 保存缓存失败:', e)
+          // 不影响返回
+        }
+
+        // 缓存 WAV 数据 (内存缓存)
+        this.cacheVoiceWav(cacheKey, wavData)
+
+        return { success: true, data: wavData.toString('base64') }
+      } catch (e) {
+        console.error('[ChatService][Voice] decoding error:', e)
+        return { success: false, error: '语音解码失败: ' + String(e) }
+      }
+    } catch (e) {
+      console.error('ChatService: getVoiceData 失败:', e)
+      return { success: false, error: String(e) }
+    }
+  }
+
+  async getVoiceData_Legacy(sessionId: string, msgId: string): Promise<{ success: boolean; data?: string; error?: string }> {
    try {
      const localId = parseInt(msgId, 10)
      const msgResult = await this.getMessageByLocalId(sessionId, localId)
@@ -2187,12 +2283,10 @@ class ChatService {
      for (const dbPath of (mediaDbs.data || [])) {
        const voiceTable = await this.resolveVoiceInfoTableName(dbPath)
        if (!voiceTable) {
-          console.warn('[ChatService][Voice] voice table not found', dbPath)
          continue
        }
        const columns = await this.resolveVoiceInfoColumns(dbPath, voiceTable)
        if (!columns) {
-          console.warn('[ChatService][Voice] voice columns not found', { dbPath, voiceTable })
          continue
        }
        for (const candidate of candidates) {
@@ -2233,52 +2327,44 @@ class ChatService {
          }
        }
        if (silkData) break
+
+        // 策略 3: 只使用 CreateTime (兜底)
+        if (!silkData && columns.createTimeColumn) {
+          const whereClause = `${columns.createTimeColumn} = ${msg.createTime}`
+          const sql = `SELECT ${columns.dataColumn} AS data FROM ${voiceTable} WHERE ${whereClause} LIMIT 1`
+          const result = await wcdbService.execQuery('media', dbPath, sql)
+          if (result.success && result.rows && result.rows.length > 0) {
+            const raw = result.rows[0]?.data
+            const decoded = this.decodeVoiceBlob(raw)
+            if (decoded && decoded.length > 0) {
+              console.info('[ChatService][Voice] hit by createTime only', { dbPath, voiceTable, whereClause, bytes: decoded.length })
+              silkData = decoded
+            }
+          }
+        }
+        if (silkData) break
      }

      if (!silkData) return { success: false, error: '未找到语音数据' }

-      // 4. 解码 Silk -> PCM -> WAV
-      const resourcesPath = app.isPackaged
-        ? join(process.resourcesPath, 'resources')
-        : join(app.getAppPath(), 'resources')
-      const decoderPath = join(resourcesPath, 'silk_v3_decoder.exe')
-
-      if (!existsSync(decoderPath)) {
-        return { success: false, error: '找不到语音解码器 (silk_v3_decoder.exe)' }
-      }
-      console.info('[ChatService][Voice] decoder path', decoderPath)
-
-      const tempDir = app.getPath('temp')
-      const silkFile = join(tempDir, `voice_${msgId}.silk`)
-      const pcmFile = join(tempDir, `voice_${msgId}.pcm`)
-
+      // 4. 使用 silk-wasm 解码
      try {
-        writeFileSync(silkFile, silkData)
-        // 执行解码: silk_v3_decoder.exe <silk> <pcm> -Fs_API 24000
-        console.info('[ChatService][Voice] executing decoder:', decoderPath, [silkFile, pcmFile])
-        const { stdout, stderr } = await execFileAsync(
-          decoderPath,
-          [silkFile, pcmFile, '-Fs_API', '24000'],
-          { cwd: dirname(decoderPath) }
-        )
-        if (stdout && stdout.trim()) console.info('[ChatService][Voice] decoder stdout:', stdout)
-        if (stderr && stderr.trim()) console.warn('[ChatService][Voice] decoder stderr:', stderr)
-
-        if (!existsSync(pcmFile)) {
-          return { success: false, error: '语音解码失败' }
+        const pcmData = await this.decodeSilkToPcm(silkData, 24000)
+        if (!pcmData) {
+          return { success: false, error: 'Silk 解码失败' }
        }

-        const pcmData = readFileSync(pcmFile)
-        const wavHeader = this.createWavHeader(pcmData.length, 24000, 1) // 微信语音通常 24kHz
-        const wavData = Buffer.concat([wavHeader, pcmData])
+        // PCM -> WAV
+        const wavData = this.createWavBuffer(pcmData, 24000)
+
+        // 缓存 WAV 数据 (内存缓存)
        const cacheKey = this.getVoiceCacheKey(sessionId, msgId)
        this.cacheVoiceWav(cacheKey, wavData)

        return { success: true, data: wavData.toString('base64') }
-      } finally {
-        // 清理临时文件
-        try { if (existsSync(silkFile)) unlinkSync(silkFile) } catch { }
-        try { if (existsSync(pcmFile)) unlinkSync(pcmFile) } catch { }
+      } catch (e) {
+        console.error('[ChatService][Voice] decoding error:', e)
+        return { success: false, error: '语音解码失败: ' + String(e) }
      }
    } catch (e) {
      console.error('ChatService: getVoiceData 失败:', e)
@@ -2286,7 +2372,69 @@ class ChatService {
    }
  }

-  async getVoiceTranscript(sessionId: string, msgId: string): Promise<{ success: boolean; transcript?: string; error?: string }> {
+
+
+  /**
+   * 解码 Silk 数据为 PCM (silk-wasm)
+   */
+  private async decodeSilkToPcm(silkData: Buffer, sampleRate: number): Promise<Buffer | null> {
+    try {
+      let wasmPath: string
+      if (app.isPackaged) {
+        wasmPath = join(process.resourcesPath, 'app.asar.unpacked', 'node_modules', 'silk-wasm', 'lib', 'silk.wasm')
+        if (!existsSync(wasmPath)) {
+          wasmPath = join(process.resourcesPath, 'node_modules', 'silk-wasm', 'lib', 'silk.wasm')
+        }
+      } else {
+        wasmPath = join(app.getAppPath(), 'node_modules', 'silk-wasm', 'lib', 'silk.wasm')
+      }
+
+      if (!existsSync(wasmPath)) {
+        console.error('[ChatService][Voice] silk.wasm not found at:', wasmPath)
+        return null
+      }
+
+      const silkWasm = require('silk-wasm')
+      if (!silkWasm || !silkWasm.decode) {
+        console.error('[ChatService][Voice] silk-wasm module invalid')
+        return null
+      }
+
+      const result = await silkWasm.decode(silkData, sampleRate)
+      return Buffer.from(result.data)
+    } catch (e) {
+      console.error('[ChatService][Voice] internal decode error:', e)
+      return null
+    }
+  }
+
+  /**
+   * 创建 WAV 文件 Buffer
+   */
+  private createWavBuffer(pcmData: Buffer, sampleRate: number = 24000, channels: number = 1): Buffer {
+    const pcmLength = pcmData.length
+    const header = Buffer.alloc(44)
+    header.write('RIFF', 0)
+    header.writeUInt32LE(36 + pcmLength, 4)
+    header.write('WAVE', 8)
+    header.write('fmt ', 12)
+    header.writeUInt32LE(16, 16)
+    header.writeUInt16LE(1, 20)
+    header.writeUInt16LE(channels, 22)
+    header.writeUInt32LE(sampleRate, 24)
+    header.writeUInt32LE(sampleRate * channels * 2, 28)
+    header.writeUInt16LE(channels * 2, 32)
+    header.writeUInt16LE(16, 34)
+    header.write('data', 36)
+    header.writeUInt32LE(pcmLength, 40)
+    return Buffer.concat([header, pcmData])
+  }
+
+  async getVoiceTranscript(
+    sessionId: string,
+    msgId: string,
+    onPartial?: (text: string) => void
+  ): Promise<{ success: boolean; transcript?: string; error?: string }> {
    const cacheKey = this.getVoiceCacheKey(sessionId, msgId)
    const cached = this.voiceTranscriptCache.get(cacheKey)
    if (cached) {
@@ -2302,14 +2450,25 @@ class ChatService {
      try {
        let wavData = this.voiceWavCache.get(cacheKey)
        if (!wavData) {
-          const voiceResult = await this.getVoiceData(sessionId, msgId)
+          // 获取消息详情以拿到 createTime 和 serverId
+          let cTime: number | undefined
+          let sId: string | number | undefined
+          const msgResult = await this.getMessageById(sessionId, parseInt(msgId, 10))
+          if (msgResult.success && msgResult.message) {
+            cTime = msgResult.message.createTime
+            sId = msgResult.message.serverId
+          }
+
+          const voiceResult = await this.getVoiceData(sessionId, msgId, cTime, sId)
          if (!voiceResult.success || !voiceResult.data) {
            return { success: false, error: voiceResult.error || '语音解码失败' }
          }
          wavData = Buffer.from(voiceResult.data, 'base64')
        }

-        const result = await voiceTranscribeService.transcribeWavBuffer(wavData)
+        const result = await voiceTranscribeService.transcribeWavBuffer(wavData, (text) => {
+          onPartial?.(text)
+        })
        if (result.success && result.transcript) {
          this.cacheVoiceTranscript(cacheKey, result.transcript)
        }
@@ -2325,26 +2484,10 @@ class ChatService {
    return task
  }

-  private createWavHeader(pcmLength: number, sampleRate: number = 24000, channels: number = 1): Buffer {
-    const header = Buffer.alloc(44)
-    header.write('RIFF', 0)
-    header.writeUInt32LE(36 + pcmLength, 4)
-    header.write('WAVE', 8)
-    header.write('fmt ', 12)
-    header.writeUInt32LE(16, 16)
-    header.writeUInt16LE(1, 20)
-    header.writeUInt16LE(channels, 22)
-    header.writeUInt32LE(sampleRate, 24)
-    header.writeUInt32LE(sampleRate * channels * 2, 28)
-    header.writeUInt16LE(channels * 2, 32)
-    header.writeUInt16LE(16, 34)
-    header.write('data', 36)
-    header.writeUInt32LE(pcmLength, 40)
-    return header
-  }
+

  private getVoiceCacheKey(sessionId: string, msgId: string): string {
-    return `${sessionId}:${msgId}`
+    return `${sessionId}_${msgId}`
  }

  private cacheVoiceWav(cacheKey: string, wavData: Buffer): void {
@@ -2355,6 +2498,32 @@ class ChatService {
    }
  }

+  /**
+   * 获取语音缓存文件路径
+   */
+  private getVoiceCacheFilePath(cacheKey: string): string {
+    const cachePath = this.configService.get('cachePath') as string | undefined
+    let baseDir: string
+    if (cachePath && cachePath.trim()) {
+      baseDir = join(cachePath, 'Voices')
+    } else {
+      const documentsPath = app.getPath('documents')
+      baseDir = join(documentsPath, 'WeFlow', 'Voices')
+    }
+    if (!existsSync(baseDir)) {
+      mkdirSync(baseDir, { recursive: true })
+    }
+    return join(baseDir, `${cacheKey}.wav`)
+  }
+
+  /**
+   * 保存语音到文件缓存
+   */
+  private saveVoiceCache(cacheKey: string, wavData: Buffer): void {
+    const filePath = this.getVoiceCacheFilePath(cacheKey)
+    writeFileSync(filePath, wavData)
+  }
+
  private cacheVoiceTranscript(cacheKey: string, transcript: string): void {
    this.voiceTranscriptCache.set(cacheKey, transcript)
    if (this.voiceTranscriptCache.size > this.voiceCacheMaxEntries) {
--- a/electron/services/contactCacheService.ts
+++ b/electron/services/contactCacheService.ts
@@ -15,7 +15,7 @@ export class ContactCacheService {
  constructor(cacheBasePath?: string) {
    const basePath = cacheBasePath && cacheBasePath.trim().length > 0
      ? cacheBasePath
-      : join(app.getPath('userData'), 'WeFlowCache')
+      : join(app.getPath('documents'), 'WeFlow')
    this.cacheFilePath = join(basePath, 'contacts.json')
    this.ensureCacheDir()
    this.loadCache()
--- a/electron/services/exportService.ts
+++ b/electron/services/exportService.ts
@@ -70,6 +70,7 @@ export interface ExportOptions {
  exportImages?: boolean
  exportVoices?: boolean
  exportEmojis?: boolean
+  exportVoiceAsText?: boolean
 }

 interface MediaExportItem {
@@ -227,6 +228,7 @@ class ExportService {

  /**
   * 解析消息内容为可读文本
+   * 注意：语音消息在这里返回占位符，实际转文字在导出时异步处理
   */
  private parseMessageContent(content: string, localType: number): string | null {
    if (!content) return null
@@ -235,7 +237,7 @@ class ExportService {
      case 1:
        return this.stripSenderPrefix(content)
      case 3: return '[图片]'
-      case 34: return '[语音消息]'
+      case 34: return '[语音消息]'  // 占位符，导出时会替换为转文字结果
      case 42: return '[名片]'
      case 43: return '[视频]'
      case 47: return '[动画表情]'
@@ -246,6 +248,7 @@ class ExportService {
      }
      case 50: return this.parseVoipMessage(content)
      case 10000: return this.cleanSystemMessage(content)
+      case 266287972401: return this.cleanSystemMessage(content)  // 拍一拍
      default:
        if (content.includes('<type>57</type>')) {
          const title = this.extractXmlValue(content, 'title')
@@ -406,7 +409,7 @@ class ExportService {
    msg: any,
    sessionId: string,
    mediaDir: string,
-    options: { exportImages?: boolean; exportVoices?: boolean; exportEmojis?: boolean }
+    options: { exportImages?: boolean; exportVoices?: boolean; exportEmojis?: boolean; exportVoiceAsText?: boolean }
  ): Promise<MediaExportItem | null> {
    const localType = msg.localType

@@ -420,9 +423,16 @@ class ExportService {
    }

    // 语音消息
-    if (localType === 34 && options.exportVoices) {
+    if (localType === 34) {
+      // 如果开启了语音转文字，优先转文字（不导出语音文件）
+      if (options.exportVoiceAsText) {
+        return null  // 转文字逻辑在消息内容处理中完成
+      }
+      // 否则导出语音文件
+      if (options.exportVoices) {
        return this.exportVoice(msg, sessionId, mediaDir)
      }
+    }

    // 动画表情
    if (localType === 47 && options.exportEmojis) {
@@ -566,6 +576,22 @@ class ExportService {
    }
  }

+  /**
+   * 转写语音为文字
+   */
+  private async transcribeVoice(sessionId: string, msgId: string): Promise<string> {
+    try {
+      const transcript = await chatService.getVoiceTranscript(sessionId, msgId)
+      if (transcript.success && transcript.transcript) {
+        return `[语音转文字] ${transcript.transcript}`
+      }
+      return '[语音消息 - 转文字失败]'
+    } catch (e) {
+      console.error('[ExportService] 语音转文字失败:', e)
+      return '[语音消息 - 转文字失败]'
+    }
+  }
+
  /**
   * 导出表情文件
   */
@@ -1057,6 +1083,31 @@ class ExportService {
    }
  }

+  /**
+   * 生成通用的导出元数据 (参考 ChatLab 格式)
+   */
+  private getExportMeta(
+    sessionId: string,
+    sessionInfo: { displayName: string },
+    isGroup: boolean,
+    sessionAvatar?: string
+  ): { chatlab: ChatLabHeader; meta: ChatLabMeta } {
+    return {
+      chatlab: {
+        version: '0.0.2',
+        exportedAt: Math.floor(Date.now() / 1000),
+        generator: 'WeFlow'
+      },
+      meta: {
+        name: sessionInfo.displayName,
+        platform: 'wechat',
+        type: isGroup ? 'group' : 'private',
+        ...(isGroup && { groupId: sessionId }),
+        ...(sessionAvatar && { groupAvatar: sessionAvatar })
+      }
+    }
+  }
+
  /**
   * 导出单个会话为 ChatLab 格式
   */
@@ -1097,21 +1148,29 @@ class ExportService {
        phase: 'exporting'
      })

-      const chatLabMessages: ChatLabMessage[] = allMessages.map((msg) => {
+      const chatLabMessages: ChatLabMessage[] = []
+      for (const msg of allMessages) {
        const memberInfo = collected.memberSet.get(msg.senderUsername)?.member || {
          platformId: msg.senderUsername,
          accountName: msg.senderUsername,
          groupNickname: undefined
        }
-        return {
+
+        let content = this.parseMessageContent(msg.content, msg.localType)
+        // 如果是语音消息且开启了转文字
+        if (msg.localType === 34 && options.exportVoiceAsText) {
+          content = await this.transcribeVoice(sessionId, String(msg.localId))
+        }
+
+        chatLabMessages.push({
          sender: msg.senderUsername,
          accountName: memberInfo.accountName,
          groupNickname: memberInfo.groupNickname,
          timestamp: msg.createTime,
          type: this.convertMessageType(msg.localType, msg.content),
-          content: this.parseMessageContent(msg.content, msg.localType)
-        }
+          content: content
        })
+      }

      const avatarMap = options.exportAvatars
        ? await this.exportAvatars(
@@ -1131,19 +1190,11 @@ class ExportService {
        return avatar ? { ...info.member, avatar } : info.member
      })

+      const { chatlab, meta } = this.getExportMeta(sessionId, sessionInfo, isGroup, sessionAvatar)
+
      const chatLabExport: ChatLabExport = {
-        chatlab: {
-          version: '0.0.1',
-          exportedAt: Math.floor(Date.now() / 1000),
-          generator: 'WeFlow'
-        },
-        meta: {
-          name: sessionInfo.displayName,
-          platform: 'wechat',
-          type: isGroup ? 'group' : 'private',
-          ...(isGroup && { groupId: sessionId }),
-          ...(sessionAvatar && { groupAvatar: sessionAvatar })
-        },
+        chatlab,
+        meta,
        members,
        messages: chatLabMessages
      }
@@ -1245,7 +1296,11 @@ class ExportService {
        phase: 'writing'
      })

-      const detailedExport = {
+      const { chatlab, meta } = this.getExportMeta(sessionId, sessionInfo, isGroup)
+
+      const detailedExport: any = {
+        chatlab,
+        meta,
        session: {
          wxid: sessionId,
          nickname: sessionInfo.displayName,
@@ -1378,6 +1433,31 @@ class ExportService {
      worksheet.getRow(currentRow).height = 20
      currentRow++

+      // 第三行：导出元数据
+      const { chatlab, meta: exportMeta } = this.getExportMeta(sessionId, sessionInfo, isGroup)
+      worksheet.getCell(currentRow, 1).value = '导出工具'
+      worksheet.getCell(currentRow, 1).font = { name: 'Calibri', bold: true, size: 11 }
+      worksheet.getCell(currentRow, 2).value = chatlab.generator
+      worksheet.getCell(currentRow, 2).font = { name: 'Calibri', size: 10 }
+
+      worksheet.getCell(currentRow, 3).value = '导出版本'
+      worksheet.getCell(currentRow, 3).font = { name: 'Calibri', bold: true, size: 11 }
+      worksheet.getCell(currentRow, 4).value = chatlab.version
+      worksheet.getCell(currentRow, 4).font = { name: 'Calibri', size: 10 }
+
+      worksheet.getCell(currentRow, 5).value = '平台'
+      worksheet.getCell(currentRow, 5).font = { name: 'Calibri', bold: true, size: 11 }
+      worksheet.getCell(currentRow, 6).value = exportMeta.platform
+      worksheet.getCell(currentRow, 6).font = { name: 'Calibri', size: 10 }
+
+      worksheet.getCell(currentRow, 7).value = '导出时间'
+      worksheet.getCell(currentRow, 7).font = { name: 'Calibri', bold: true, size: 11 }
+      worksheet.getCell(currentRow, 8).value = this.formatTimestamp(chatlab.exportedAt)
+      worksheet.getCell(currentRow, 8).font = { name: 'Calibri', size: 10 }
+
+      worksheet.getRow(currentRow).height = 20
+      currentRow++
+
      // 表头行
      const headers = ['序号', '时间', '发送者昵称', '发送者微信ID', '发送者备注', '发送者身份', '消息类型', '内容']
      const headerRow = worksheet.getRow(currentRow)
@@ -1429,7 +1509,8 @@ class ExportService {
            mediaItem = await this.exportMediaForMessage(msg, sessionId, sessionDir, {
              exportImages: options.exportImages,
              exportVoices: options.exportVoices,
-              exportEmojis: options.exportEmojis
+              exportEmojis: options.exportEmojis,
+              exportVoiceAsText: options.exportVoiceAsText
            })
            mediaCache.set(mediaKey, mediaItem)
          }
--- a/electron/services/imageDecryptService.ts
+++ b/electron/services/imageDecryptService.ts
@@ -115,7 +115,6 @@ export class ImageDecryptService {
    for (const key of cacheKeys) {
      const cached = this.resolvedCache.get(key)
      if (cached && existsSync(cached) && this.isImageFile(cached)) {
-        this.logInfo('缓存命中(从Map)', { key, path: cached, isThumb: this.isThumbnailPath(cached) })
        const dataUrl = this.fileToDataUrl(cached)
        const isThumb = this.isThumbnailPath(cached)
        const hasUpdate = isThumb ? (this.updateFlags.get(key) ?? false) : false
@@ -135,7 +134,6 @@ export class ImageDecryptService {
    for (const key of cacheKeys) {
      const existing = this.findCachedOutput(key, false, payload.sessionId)
      if (existing) {
-        this.logInfo('缓存命中(文件系统)', { key, path: existing, isThumb: this.isThumbnailPath(existing) })
        this.cacheResolvedPaths(key, payload.imageMd5, payload.imageDatName, existing)
        const dataUrl = this.fileToDataUrl(existing)
        const isThumb = this.isThumbnailPath(existing)
--- a/electron/services/messageCacheService.ts
+++ b/electron/services/messageCacheService.ts
@@ -15,7 +15,7 @@ export class MessageCacheService {
  constructor(cacheBasePath?: string) {
    const basePath = cacheBasePath && cacheBasePath.trim().length > 0
      ? cacheBasePath
-      : join(app.getPath('userData'), 'WeFlowCache')
+      : join(app.getPath('documents'), 'WeFlow')
    this.cacheFilePath = join(basePath, 'session-messages.json')
    this.ensureCacheDir()
    this.loadCache()
--- a/electron/services/voiceTranscribeService.ts
+++ b/electron/services/voiceTranscribeService.ts
@@ -1,19 +1,23 @@
 import { app } from 'electron'
-import { createWriteStream, existsSync, mkdirSync, statSync, unlinkSync, writeFileSync } from 'fs'
-import { join, dirname } from 'path'
-import { promisify } from 'util'
-import { execFile, spawnSync } from 'child_process'
+import { existsSync, mkdirSync, statSync, unlinkSync, createWriteStream } from 'fs'
+import { join } from 'path'
 import * as https from 'https'
 import * as http from 'http'
 import { ConfigService } from './config'

-const execFileAsync = promisify(execFile)
+// Sherpa-onnx 类型定义
+type OfflineRecognizer = any
+type OfflineStream = any

-type WhisperModelInfo = {
+type ModelInfo = {
  name: string
-  fileName: string
+  files: {
+    model: string
+    tokens: string
+    vad: string
+  }
+  sizeBytes: number
  sizeLabel: string
-  sizeBytes?: number
 }

 type DownloadProgress = {
@@ -23,122 +27,169 @@ type DownloadProgress = {
  percent?: number
 }

-const WHISPER_MODELS: Record<string, WhisperModelInfo> = {
-  tiny: { name: 'tiny', fileName: 'ggml-tiny.bin', sizeLabel: '75 MB', sizeBytes: 75_000_000 },
-  base: { name: 'base', fileName: 'ggml-base.bin', sizeLabel: '142 MB', sizeBytes: 142_000_000 },
-  small: { name: 'small', fileName: 'ggml-small.bin', sizeLabel: '466 MB', sizeBytes: 466_000_000 },
-  medium: { name: 'medium', fileName: 'ggml-medium.bin', sizeLabel: '1.5 GB', sizeBytes: 1_500_000_000 },
-  'large-v3': { name: 'large-v3', fileName: 'ggml-large-v3.bin', sizeLabel: '2.9 GB', sizeBytes: 2_900_000_000 }
+const SENSEVOICE_MODEL: ModelInfo = {
+  name: 'SenseVoiceSmall',
+  files: {
+    model: 'model.int8.onnx',
+    tokens: 'tokens.txt',
+    vad: 'silero_vad.onnx'
+  },
+  sizeBytes: 245_000_000,
+  sizeLabel: '245 MB'
 }

-const WHISPER_SOURCES: Record<string, string> = {
-  official: 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main',
-  tsinghua: 'https://hf-mirror.com/ggerganov/whisper.cpp/resolve/main'
-}
-
-function getStaticFfmpegPath(): string | null {
-  try {
-    // eslint-disable-next-line @typescript-eslint/no-var-requires
-    const ffmpegStatic = require('ffmpeg-static')
-    if (typeof ffmpegStatic === 'string' && existsSync(ffmpegStatic)) {
-      return ffmpegStatic
-    }
-    const devPath = join(process.cwd(), 'node_modules', 'ffmpeg-static', 'ffmpeg.exe')
-    if (existsSync(devPath)) {
-      return devPath
-    }
-    if (app.isPackaged) {
-      const resourcesPath = process.resourcesPath
-      const packedPath = join(resourcesPath, 'app.asar.unpacked', 'node_modules', 'ffmpeg-static', 'ffmpeg.exe')
-      if (existsSync(packedPath)) {
-        return packedPath
-      }
-    }
-    return null
-  } catch {
-    return null
-  }
+const MODEL_DOWNLOAD_URLS = {
+  model: 'https://modelscope.cn/models/pengzhendong/sherpa-onnx-sense-voice-zh-en-ja-ko-yue/resolve/master/model.int8.onnx',
+  tokens: 'https://modelscope.cn/models/pengzhendong/sherpa-onnx-sense-voice-zh-en-ja-ko-yue/resolve/master/tokens.txt',
+  vad: 'https://www.modelscope.cn/models/manyeyes/silero-vad-onnx/resolve/master/silero_vad.onnx'
 }

 export class VoiceTranscribeService {
  private configService = new ConfigService()
  private downloadTasks = new Map<string, Promise<{ success: boolean; path?: string; error?: string }>>()
+  private recognizer: OfflineRecognizer | null = null
+  private isInitializing = false

-  private resolveModelInfo(modelName: string): WhisperModelInfo | null {
-    return WHISPER_MODELS[modelName] || null
-  }
-
-  private resolveModelDir(overrideDir?: string): string {
-    const configured = overrideDir || this.configService.get('whisperModelDir')
+  private resolveModelDir(): string {
+    const configured = this.configService.get('whisperModelDir') as string | undefined
    if (configured) return configured
-    return join(app.getPath('userData'), 'models', 'whisper')
+    return join(app.getPath('documents'), 'WeFlow', 'models', 'sensevoice')
  }

-  private resolveModelPath(modelName: string, overrideDir?: string): string | null {
-    const info = this.resolveModelInfo(modelName)
-    if (!info) return null
-    return join(this.resolveModelDir(overrideDir), info.fileName)
+  private resolveModelPath(fileName: string): string {
+    return join(this.resolveModelDir(), fileName)
  }

-  private resolveSourceUrl(overrideSource?: string): string {
-    const configured = overrideSource || this.configService.get('whisperDownloadSource')
-    if (configured && WHISPER_SOURCES[configured]) return WHISPER_SOURCES[configured]
-    return WHISPER_SOURCES.official
-  }
-
-  async getModelStatus(payload: { modelName: string; downloadDir?: string }): Promise<{
+  /**
+   * 检查模型状态
+   */
+  async getModelStatus(): Promise<{
    success: boolean
    exists?: boolean
-    path?: string
+    modelPath?: string
+    tokensPath?: string
    sizeBytes?: number
    error?: string
  }> {
-    const modelPath = this.resolveModelPath(payload.modelName, payload.downloadDir)
-    if (!modelPath) {
-      return { success: false, error: '未知模型名称' }
-    }
-    if (!existsSync(modelPath)) {
-      return { success: true, exists: false, path: modelPath }
-    }
-    const sizeBytes = statSync(modelPath).size
-    return { success: true, exists: true, path: modelPath, sizeBytes }
+    try {
+      const modelPath = this.resolveModelPath(SENSEVOICE_MODEL.files.model)
+      const tokensPath = this.resolveModelPath(SENSEVOICE_MODEL.files.tokens)
+      const vadPath = this.resolveModelPath((SENSEVOICE_MODEL.files as any).vad)
+
+      const modelExists = existsSync(modelPath)
+      const tokensExists = existsSync(tokensPath)
+      const vadExists = existsSync(vadPath)
+      const exists = modelExists && tokensExists && vadExists
+
+      if (!exists) {
+        return { success: true, exists: false, modelPath, tokensPath }
      }

+      const modelSize = statSync(modelPath).size
+      const tokensSize = statSync(tokensPath).size
+      const vadSize = statSync(vadPath).size
+      const totalSize = modelSize + tokensSize + vadSize
+
+      return {
+        success: true,
+        exists: true,
+        modelPath,
+        tokensPath,
+        sizeBytes: totalSize
+      }
+    } catch (error) {
+      console.error('[VoiceTranscribe] getModelStatus error:', error)
+      return { success: false, error: String(error) }
+    }
+  }
+
+  /**
+   * 下载模型文件
+   */
  async downloadModel(
-    payload: { modelName: string; downloadDir?: string; source?: string },
    onProgress?: (progress: DownloadProgress) => void
-  ): Promise<{ success: boolean; path?: string; error?: string }> {
-    const info = this.resolveModelInfo(payload.modelName)
-    if (!info) {
-      return { success: false, error: '未知模型名称' }
-    }
-
-    const modelPath = this.resolveModelPath(payload.modelName, payload.downloadDir)
-    if (!modelPath) {
-      return { success: false, error: '模型路径生成失败' }
-    }
-
-    if (existsSync(modelPath)) {
-      return { success: true, path: modelPath }
-    }
-
-    const cacheKey = `${payload.modelName}:${modelPath}`
+  ): Promise<{ success: boolean; modelPath?: string; tokensPath?: string; error?: string }> {
+    const cacheKey = 'sensevoice'
    const pending = this.downloadTasks.get(cacheKey)
    if (pending) return pending

    const task = (async () => {
      try {
-        const targetDir = this.resolveModelDir(payload.downloadDir)
-        if (!existsSync(targetDir)) {
-          mkdirSync(targetDir, { recursive: true })
+        const modelDir = this.resolveModelDir()
+        if (!existsSync(modelDir)) {
+          mkdirSync(modelDir, { recursive: true })
        }

-        const baseUrl = this.resolveSourceUrl(payload.source)
-        const url = `${baseUrl}/${info.fileName}`
-        await this.downloadToFile(url, modelPath, payload.modelName, onProgress)
-        return { success: true, path: modelPath }
+        const modelPath = this.resolveModelPath(SENSEVOICE_MODEL.files.model)
+        const tokensPath = this.resolveModelPath(SENSEVOICE_MODEL.files.tokens)
+        const vadPath = this.resolveModelPath((SENSEVOICE_MODEL.files as any).vad)
+
+        // 下载模型文件 (40%)
+        console.info('[VoiceTranscribe] 开始下载模型文件...')
+        await this.downloadToFile(
+          MODEL_DOWNLOAD_URLS.model,
+          modelPath,
+          'model',
+          (downloaded, total) => {
+            const percent = total ? (downloaded / total) * 40 : undefined
+            onProgress?.({
+              modelName: SENSEVOICE_MODEL.name,
+              downloadedBytes: downloaded,
+              totalBytes: SENSEVOICE_MODEL.sizeBytes,
+              percent
+            })
+          }
+        )
+
+        // 下载 tokens 文件 (30%)
+        console.info('[VoiceTranscribe] 开始下载 tokens 文件...')
+        await this.downloadToFile(
+          MODEL_DOWNLOAD_URLS.tokens,
+          tokensPath,
+          'tokens',
+          (downloaded, total) => {
+            const modelSize = existsSync(modelPath) ? statSync(modelPath).size : 0
+            const percent = total ? 40 + (downloaded / total) * 30 : 40
+            onProgress?.({
+              modelName: SENSEVOICE_MODEL.name,
+              downloadedBytes: modelSize + downloaded,
+              totalBytes: SENSEVOICE_MODEL.sizeBytes,
+              percent
+            })
+          }
+        )
+
+        // 下载 vad 文件 (30%)
+        console.info('[VoiceTranscribe] 开始下载 VAD 文件...')
+        await this.downloadToFile(
+          (MODEL_DOWNLOAD_URLS as any).vad,
+          vadPath,
+          'vad',
+          (downloaded, total) => {
+            const modelSize = existsSync(modelPath) ? statSync(modelPath).size : 0
+            const tokensSize = existsSync(tokensPath) ? statSync(tokensPath).size : 0
+            const percent = total ? 70 + (downloaded / total) * 30 : 70
+            onProgress?.({
+              modelName: SENSEVOICE_MODEL.name,
+              downloadedBytes: modelSize + tokensSize + downloaded,
+              totalBytes: SENSEVOICE_MODEL.sizeBytes,
+              percent
+            })
+          }
+        )
+
+        console.info('[VoiceTranscribe] 模型下载完成')
+        return { success: true, modelPath, tokensPath }
      } catch (error) {
-        try { if (existsSync(modelPath)) unlinkSync(modelPath) } catch { }
+        console.error('[VoiceTranscribe] 下载失败:', error)
+        const modelPath = this.resolveModelPath(SENSEVOICE_MODEL.files.model)
+        const tokensPath = this.resolveModelPath(SENSEVOICE_MODEL.files.tokens)
+        const vadPath = this.resolveModelPath((SENSEVOICE_MODEL.files as any).vad)
+        try {
+          if (existsSync(modelPath)) unlinkSync(modelPath)
+          if (existsSync(tokensPath)) unlinkSync(tokensPath)
+          if (existsSync(vadPath)) unlinkSync(vadPath)
+        } catch { }
        return { success: false, error: String(error) }
      } finally {
        this.downloadTasks.delete(cacheKey)
@@ -149,102 +200,108 @@ export class VoiceTranscribeService {
    return task
  }

-  async transcribeWavBuffer(wavData: Buffer): Promise<{ success: boolean; transcript?: string; error?: string }> {
-    const modelName = this.configService.get('whisperModelName') || 'base'
-    const modelPath = this.resolveModelPath(modelName)
-    console.info('[VoiceTranscribe] check model', { modelName, modelPath, exists: modelPath ? existsSync(modelPath) : false })
-    if (!modelPath || !existsSync(modelPath)) {
-      return { success: false, error: '未下载语音模型，请在设置中下载' }
-    }
-
-    // 使用内置的预编译 whisper-cli.exe
-    const resourcesPath = app.isPackaged
-      ? join(process.resourcesPath, 'resources')
-      : join(app.getAppPath(), 'resources')
-    const whisperExe = join(resourcesPath, 'whisper-cli.exe')
-    
-    if (!existsSync(whisperExe)) {
-      return { success: false, error: '找不到语音转写程序，请重新安装应用' }
-    }
-
-    const ffmpegPath = getStaticFfmpegPath() || 'ffmpeg'
-    console.info('[VoiceTranscribe] ffmpeg path', ffmpegPath)
-
-    const tempDir = app.getPath('temp')
-    const fileToken = `${Date.now()}_${Math.random().toString(16).slice(2)}`
-    const inputPath = join(tempDir, `weflow_voice_${fileToken}.wav`)
-    const outputPath = join(tempDir, `weflow_voice_${fileToken}_16k.wav`)
-
+  /**
+   * 转写 WAV 音频数据 (后台 Worker Threads 版本)
+   */
+  async transcribeWavBuffer(
+    wavData: Buffer,
+    onPartial?: (text: string) => void
+  ): Promise<{ success: boolean; transcript?: string; error?: string }> {
+    return new Promise((resolve) => {
      try {
-      writeFileSync(inputPath, wavData)
-      console.info('[VoiceTranscribe] converting to 16kHz', { inputPath, outputPath })
-      await execFileAsync(ffmpegPath, ['-y', '-i', inputPath, '-ar', '16000', '-ac', '1', outputPath])
+        const modelPath = this.resolveModelPath(SENSEVOICE_MODEL.files.model)
+        const tokensPath = this.resolveModelPath(SENSEVOICE_MODEL.files.tokens)

-      console.info('[VoiceTranscribe] transcribing with whisper', { whisperExe, modelPath })
-      const { stdout, stderr } = await execFileAsync(whisperExe, [
-        '-m', modelPath,
-        '-f', outputPath,
-        '-l', 'zh',
-        '-otxt',
-        '-np'  // no prints (只输出结果)
-      ], {
-        maxBuffer: 10 * 1024 * 1024,
-        cwd: dirname(whisperExe),  // 设置工作目录为 whisper-cli.exe 所在目录，确保能找到 DLL
-        env: { ...process.env, PATH: `${dirname(whisperExe)};${process.env.PATH}` }
+        if (!existsSync(modelPath) || !existsSync(tokensPath)) {
+          resolve({ success: false, error: '模型文件不存在，请先下载模型' })
+          return
+        }
+
+        const { Worker } = require('worker_threads')
+        // main.js 和 transcribeWorker.js 同在 dist-electron 目录下
+        const workerPath = join(__dirname, 'transcribeWorker.js')
+
+        console.info('[VoiceTranscribe] 启动后台 Worker 转写...', { workerPath })
+
+        const worker = new Worker(workerPath, {
+          workerData: {
+            modelPath,
+            tokensPath,
+            wavData,
+            sampleRate: 16000
+          }
        })

-      console.info('[VoiceTranscribe] whisper stdout:', stdout)
-      if (stderr) console.warn('[VoiceTranscribe] whisper stderr:', stderr)
+        let finalTranscript = ''

-      // 解析输出文本
-      const outputBase = outputPath.replace(/\.[^.]+$/, '')
-      const txtFile = `${outputBase}.txt`
-      let transcript = ''
-      if (existsSync(txtFile)) {
-        const { readFileSync } = await import('fs')
-        transcript = readFileSync(txtFile, 'utf-8').trim()
-        unlinkSync(txtFile)
-      } else {
-        // 从 stdout 提取（使用 -np 参数后，stdout 只有转写结果）
-        transcript = stdout.trim()
-      }
-
-      console.info('[VoiceTranscribe] success', { transcript })
-      return { success: true, transcript }
-    } catch (error: any) {
-      console.error('[VoiceTranscribe] failed', error)
-      console.error('[VoiceTranscribe] stderr:', error.stderr)
-      console.error('[VoiceTranscribe] stdout:', error.stdout)
-      return { success: false, error: String(error) }
-    } finally {
-      try { if (existsSync(inputPath)) unlinkSync(inputPath) } catch { }
-      try { if (existsSync(outputPath)) unlinkSync(outputPath) } catch { }
-    }
+        worker.on('message', (msg: any) => {
+          if (msg.type === 'partial') {
+            onPartial?.(msg.text)
+          } else if (msg.type === 'final') {
+            finalTranscript = msg.text
+            resolve({ success: true, transcript: finalTranscript })
+            worker.terminate()
+          } else if (msg.type === 'error') {
+            resolve({ success: false, error: msg.error })
+            worker.terminate()
+          }
+        })
+
+        worker.on('error', (err: Error) => {
+          console.error('[VoiceTranscribe] Worker error:', err)
+          resolve({ success: false, error: String(err) })
+        })
+
+        worker.on('exit', (code: number) => {
+          if (code !== 0) {
+            console.error(`[VoiceTranscribe] Worker stopped with exit code ${code}`)
+            resolve({ success: false, error: `Worker exited with code ${code}` })
+          }
+        })
+
+      } catch (error) {
+        console.error('[VoiceTranscribe] 启动 Worker 失败:', error)
+        resolve({ success: false, error: String(error) })
+      }
+    })
  }

+  /**
+   * 下载文件
+   */
  private downloadToFile(
    url: string,
    targetPath: string,
-    modelName: string,
-    onProgress?: (progress: DownloadProgress) => void,
-    remainingRedirects = 3
+    fileName: string,
+    onProgress?: (downloaded: number, total?: number) => void,
+    remainingRedirects = 5
  ): Promise<void> {
    return new Promise((resolve, reject) => {
      const protocol = url.startsWith('https') ? https : http
-      const request = protocol.get(url, (response) => {
+      console.info(`[VoiceTranscribe] 下载 ${fileName}:`, url)
+
+      const options = {
+        headers: {
+          'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
+        }
+      }
+
+      const request = protocol.get(url, options, (response) => {
+        // 处理重定向
        if ([301, 302, 303, 307, 308].includes(response.statusCode || 0) && response.headers.location) {
          if (remainingRedirects <= 0) {
-            reject(new Error('下载重定向次数过多'))
+            reject(new Error('重定向次数过多'))
            return
          }
-          this.downloadToFile(response.headers.location, targetPath, modelName, onProgress, remainingRedirects - 1)
+          console.info(`[VoiceTranscribe] 重定向到:`, response.headers.location)
+          this.downloadToFile(response.headers.location, targetPath, fileName, onProgress, remainingRedirects - 1)
            .then(resolve)
            .catch(reject)
          return
        }

        if (response.statusCode !== 200) {
-          reject(new Error(`下载失败: ${response.statusCode}`))
+          reject(new Error(`下载失败: HTTP ${response.statusCode}`))
          return
        }

@@ -255,8 +312,7 @@ export class VoiceTranscribeService {

        response.on('data', (chunk) => {
          downloadedBytes += chunk.length
-          const percent = totalBytes ? (downloadedBytes / totalBytes) * 100 : undefined
-          onProgress?.({ modelName, downloadedBytes, totalBytes, percent })
+          onProgress?.(downloadedBytes, totalBytes)
        })

        response.on('error', (error) => {
@@ -271,14 +327,32 @@ export class VoiceTranscribeService {

        writer.on('finish', () => {
          writer.close()
+          console.info(`[VoiceTranscribe] ${fileName} 下载完成:`, targetPath)
          resolve()
        })

        response.pipe(writer)
      })

-      request.on('error', reject)
+      request.on('error', (error) => {
+        console.error(`[VoiceTranscribe] ${fileName} 下载错误:`, error)
+        reject(error)
      })
+    })
+  }
+
+  /**
+   * 清理资源
+   */
+  dispose() {
+    if (this.recognizer) {
+      try {
+        // sherpa-onnx 的 recognizer 可能需要手动释放
+        this.recognizer = null
+      } catch (error) {
+        console.error('[VoiceTranscribe] 释放识别器失败:', error)
+      }
+    }
  }
 }

--- a/electron/services/wcdbCore.ts
+++ b/electron/services/wcdbCore.ts
@@ -48,6 +48,7 @@ export class WcdbCore {
  private wcdbGetMessageById: any = null
  private wcdbGetEmoticonCdnUrl: any = null
  private wcdbGetDbStatus: any = null
+  private wcdbGetVoiceData: any = null
  private avatarUrlCache: Map<string, { url?: string; updatedAt: number }> = new Map()
  private readonly avatarCacheTtlMs = 10 * 60 * 1000
  private logTimer: NodeJS.Timeout | null = null
@@ -108,12 +109,13 @@ export class WcdbCore {

  private writeLog(message: string, force = false): void {
    if (!force && !this.isLogEnabled()) return
+    const line = `[${new Date().toISOString()}] ${message}`
+    console.log(`[WCDB] ${line}`)
    try {
      const base = this.userDataPath || process.env.WCDB_LOG_DIR || process.cwd()
      const dir = join(base, 'logs')
      if (!existsSync(dir)) mkdirSync(dir, { recursive: true })
-      const line = `[${new Date().toISOString()}] ${message}\n`
-      appendFileSync(join(dir, 'wcdb.log'), line, { encoding: 'utf8' })
+      appendFileSync(join(dir, 'wcdb.log'), line + '\n', { encoding: 'utf8' })
    } catch { }
  }

@@ -345,6 +347,13 @@ export class WcdbCore {
        this.wcdbGetDbStatus = null
      }

+      // wcdb_status wcdb_get_voice_data(wcdb_handle handle, const char* session_id, int32_t create_time, const char* candidates_json, char** out_hex)
+      try {
+        this.wcdbGetVoiceData = this.lib.func('int32 wcdb_get_voice_data(int64 handle, const char* sessionId, int32 createTime, int64 svrId, const char* candidatesJson, _Out_ void** outHex)')
+      } catch {
+        this.wcdbGetVoiceData = null
+      }
+
      // 初始化
      const initResult = this.wcdbInit()
      if (initResult !== 0) {
@@ -1295,9 +1304,7 @@ export class WcdbCore {
    } catch (e) {
      return { success: false, error: String(e) }
    }
-  }
-
-  async getMessageById(sessionId: string, localId: number): Promise<{ success: boolean; message?: any; error?: string }> {
+  } async getMessageById(sessionId: string, localId: number): Promise<{ success: boolean; message?: any; error?: string }> {
    if (!this.ensureReady()) return { success: false, error: 'WCDB 未连接' }
    try {
      const outPtr = [null as any]
@@ -1313,5 +1320,21 @@ export class WcdbCore {
      return { success: false, error: String(e) }
    }
  }
-}

+  async getVoiceData(sessionId: string, createTime: number, candidates: string[], svrId: string | number = 0): Promise<{ success: boolean; hex?: string; error?: string }> {
+    if (!this.ensureReady()) return { success: false, error: 'WCDB 未连接' }
+    if (!this.wcdbGetVoiceData) return { success: false, error: '当前 DLL 版本不支持获取语音数据' }
+    try {
+      const outPtr = [null as any]
+      const result = this.wcdbGetVoiceData(this.handle, sessionId, createTime, BigInt(svrId || 0), JSON.stringify(candidates), outPtr)
+      if (result !== 0 || !outPtr[0]) {
+        return { success: false, error: `获取语音数据失败: ${result}` }
+      }
+      const hex = this.decodeJsonPtr(outPtr[0])
+      if (hex === null) return { success: false, error: '解析语音数据失败' }
+      return { success: true, hex: hex || undefined }
+    } catch (e) {
+      return { success: false, error: String(e) }
+    }
+  }
+}
--- a/electron/services/wcdbService.ts
+++ b/electron/services/wcdbService.ts
@@ -341,6 +341,13 @@ export class WcdbService {
    return this.callWorker('getMessageById', { sessionId, localId })
  }

+  /**
+   * 获取语音数据
+   */
+  async getVoiceData(sessionId: string, createTime: number, candidates: string[], svrId: string | number = 0): Promise<{ success: boolean; hex?: string; error?: string }> {
+    return this.callWorker('getVoiceData', { sessionId, createTime, candidates, svrId })
+  }
+
 }

 export const wcdbService = new WcdbService()
--- a/electron/transcribeWorker.ts
+++ b/electron/transcribeWorker.ts
@@ -0,0 +1,174 @@
+import { parentPort, workerData } from 'worker_threads'
+import * as fs from 'fs'
+
+interface WorkerParams {
+    modelPath: string
+    tokensPath: string
+    wavData: Buffer
+    sampleRate: number
+}
+
+async function run() {
+    console.info('[TranscribeWorker] Worker process starting...');
+
+    if (!parentPort) {
+        console.error('[TranscribeWorker] Critical Error: parentPort is null');
+        return;
+    }
+
+    try {
+        console.info('[TranscribeWorker] Loading sherpa-onnx-node...');
+        // 动态加载以捕获可能的加载错误（如 C++ 运行库缺失等）
+        let sherpa: any;
+        try {
+            sherpa = require('sherpa-onnx-node');
+            console.info('[TranscribeWorker] sherpa-onnx-node loaded successfully.');
+        } catch (requireError) {
+            console.error('[TranscribeWorker] Failed to load sherpa-onnx-node:', requireError);
+            parentPort.postMessage({ type: 'error', error: 'Failed to load speech engine: ' + String(requireError) });
+            return;
+        }
+
+        const { modelPath, tokensPath, wavData: rawWavData, sampleRate } = workerData as WorkerParams
+        const wavData = Buffer.from(rawWavData);
+        console.info('[TranscribeWorker] Params received:', {
+            modelPath,
+            tokensPath,
+            sampleRate,
+            wavDataLength: wavData?.length
+        });
+
+        // 1. 初始化识别器 (SenseVoiceSmall)
+        console.info('[TranscribeWorker] Initializing OfflineRecognizer...');
+        const recognizerConfig = {
+            modelConfig: {
+                senseVoice: {
+                    model: modelPath,
+                    useInverseTextNormalization: 1
+                },
+                tokens: tokensPath,
+                numThreads: 2,
+                debug: 0
+            }
+        }
+        const recognizer = new sherpa.OfflineRecognizer(recognizerConfig)
+        console.info('[TranscribeWorker] OfflineRecognizer initialized.');
+
+        // 2. 初始化 VAD (用于流式输出效果)
+        const vadPath = modelPath.replace('model.int8.onnx', 'silero_vad.onnx');
+        console.info('[TranscribeWorker] VAD Path:', vadPath);
+
+        const vadConfig = {
+            sileroVad: {
+                model: vadPath,
+                threshold: 0.5,
+                minSilenceDuration: 0.5,
+                minSpeechDuration: 0.25,
+                windowSize: 512
+            },
+            sampleRate: sampleRate,
+            debug: 0,
+            numThreads: 1
+        }
+
+        // 检查 VAD 模型是否存在，如果不存在则退回到全量识别
+        if (!fs.existsSync(vadPath)) {
+            console.warn('[TranscribeWorker] VAD model not found, falling back to full transcription.');
+
+            const pcmData = wavData.slice(44)
+            const samples = new Float32Array(pcmData.length / 2)
+            for (let i = 0; i < samples.length; i++) {
+                samples[i] = pcmData.readInt16LE(i * 2) / 32768.0
+            }
+
+            const stream = recognizer.createStream()
+            stream.acceptWaveform({ sampleRate, samples })
+            recognizer.decode(stream)
+            const result = recognizer.getResult(stream)
+
+            console.info('[TranscribeWorker] Full transcription result:', result.text);
+            parentPort.postMessage({ type: 'final', text: result.text })
+            return
+        }
+
+        console.info('[TranscribeWorker] Initializing Vad...');
+        const vad = new sherpa.Vad(vadConfig, 60) // 60s max
+        console.info('[TranscribeWorker] VAD initialized.');
+
+        // 3. 处理音频数据
+        const pcmData = wavData.slice(44)
+        const samples = new Float32Array(pcmData.length / 2)
+        for (let i = 0; i < samples.length; i++) {
+            samples[i] = pcmData.readInt16LE(i * 2) / 32768.0
+        }
+
+        // 模拟流式输入：按小块喂给 VAD
+        const chunkSize = 1600 // 100ms for 16kHz
+        let offset = 0
+        let accumulatedText = ''
+
+        console.info('[TranscribeWorker] Starting processing loop...');
+        let segmentCount = 0;
+
+        while (offset < samples.length) {
+            const end = Math.min(offset + chunkSize, samples.length)
+            const chunk = samples.subarray(offset, end)
+
+            vad.acceptWaveform(chunk)
+
+            // 检查 ASR 结果
+            while (!vad.isEmpty()) {
+                const segment = vad.front(false)
+
+                // Log segment detection
+                console.info(`[TranscribeWorker] VAD Segment detected. Duration: ${segment.samples.length / sampleRate}s`);
+
+                const stream = recognizer.createStream()
+                stream.acceptWaveform({ sampleRate, samples: segment.samples })
+                recognizer.decode(stream)
+                const result = recognizer.getResult(stream)
+
+                if (result.text) {
+                    const text = result.text.trim();
+                    if (text.length > 0) {
+                        accumulatedText += (accumulatedText ? ' ' : '') + text
+                        segmentCount++;
+                        console.info(`[TranscribeWorker] Partial update #${segmentCount}: "${text}" -> Total: "${accumulatedText.substring(0, 50)}..."`);
+                        parentPort.postMessage({ type: 'partial', text: accumulatedText })
+                    }
+                }
+                vad.pop()
+            }
+
+            offset = end
+            // 让出主循环，保持响应
+            await new Promise(resolve => setImmediate(resolve))
+        }
+
+        // Ensure any remaining buffer is processed
+        vad.flush();
+        while (!vad.isEmpty()) {
+            const segment = vad.front(false);
+            console.info(`[TranscribeWorker] Final VAD Segment detected. Duration: ${segment.samples.length / sampleRate}s`);
+            const stream = recognizer.createStream()
+            stream.acceptWaveform({ sampleRate, samples: segment.samples })
+            recognizer.decode(stream)
+            const result = recognizer.getResult(stream)
+            if (result.text) {
+                accumulatedText += (accumulatedText ? ' ' : '') + result.text.trim()
+                console.info(`[TranscribeWorker] Final partial update: "${result.text.trim()}"`);
+                parentPort.postMessage({ type: 'partial', text: accumulatedText })
+            }
+            vad.pop();
+        }
+
+        console.info('[TranscribeWorker] Loop finished. Final text length:', accumulatedText.length);
+        parentPort.postMessage({ type: 'final', text: accumulatedText })
+
+    } catch (error) {
+        console.error('[TranscribeWorker] Fatal error:', error);
+        parentPort.postMessage({ type: 'error', error: String(error) })
+    }
+}
+
+run();
--- a/electron/types/sherpa-onnx-node.d.ts
+++ b/electron/types/sherpa-onnx-node.d.ts
@@ -0,0 +1,4 @@
+declare module 'sherpa-onnx-node' {
+    const content: any;
+    export = content;
+}
--- a/electron/wcdbWorker.ts
+++ b/electron/wcdbWorker.ts
@@ -110,6 +110,12 @@ if (parentPort) {
                case 'getMessageById':
                    result = await core.getMessageById(payload.sessionId, payload.localId)
                    break
+                case 'getVoiceData':
+                    result = await core.getVoiceData(payload.sessionId, payload.createTime, payload.candidates, payload.svrId)
+                    if (!result.success) {
+                        console.error('[wcdbWorker] getVoiceData failed:', result.error)
+                    }
+                    break
                default:
                    result = { success: false, error: `Unknown method: ${type}` }
            }
--- a/package-lock.json
+++ b/package-lock.json
@@ -25,8 +25,9 @@
        "react": "^19.2.3",
        "react-dom": "^19.2.3",
        "react-router-dom": "^7.1.1",
+        "sherpa-onnx-node": "^1.10.38",
+        "silk-wasm": "^3.7.1",
        "wechat-emojis": "^1.0.2",
-        "whisper-node": "^1.1.1",
        "zustand": "^5.0.2"
      },
      "devDependencies": {
@@ -6005,6 +6006,7 @@
      "version": "1.1.2",
      "resolved": "https://registry.npmmirror.com/function-bind/-/function-bind-1.1.2.tgz",
      "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
+      "dev": true,
      "license": "MIT",
      "funding": {
        "url": "https://github.com/sponsors/ljharb"
@@ -6297,6 +6299,7 @@
      "version": "2.0.2",
      "resolved": "https://registry.npmmirror.com/hasown/-/hasown-2.0.2.tgz",
      "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
+      "dev": true,
      "license": "MIT",
      "dependencies": {
        "function-bind": "^1.1.2"
@@ -6539,15 +6542,6 @@
      "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==",
      "license": "ISC"
    },
-    "node_modules/interpret": {
-      "version": "1.4.0",
-      "resolved": "https://registry.npmmirror.com/interpret/-/interpret-1.4.0.tgz",
-      "integrity": "sha512-agE4QfB2Lkp9uICn7BAqoscw4SZP9kTE2hxiFI3jBPmXJfdqiahTbUuKGsMoN2GtqL9AxhYioAcVvgsb1HvRbA==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.10"
-      }
-    },
    "node_modules/ip-address": {
      "version": "10.1.0",
      "resolved": "https://registry.npmmirror.com/ip-address/-/ip-address-10.1.0.tgz",
@@ -6571,21 +6565,6 @@
        "is-ci": "bin.js"
      }
    },
-    "node_modules/is-core-module": {
-      "version": "2.16.1",
-      "resolved": "https://registry.npmmirror.com/is-core-module/-/is-core-module-2.16.1.tgz",
-      "integrity": "sha512-UfoeMA6fIJ8wTYFEUjelnaGI67v6+N7qXJEvQuIGa99l4xsCruSYOVSQ0uPANn4dAzm8lkYPaKLrrijLq7x23w==",
-      "license": "MIT",
-      "dependencies": {
-        "hasown": "^2.0.2"
-      },
-      "engines": {
-        "node": ">= 0.4"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
    "node_modules/is-extglob": {
      "version": "2.1.1",
      "resolved": "https://registry.npmmirror.com/is-extglob/-/is-extglob-2.1.1.tgz",
@@ -7753,12 +7732,6 @@
        "node": ">=8"
      }
    },
-    "node_modules/path-parse": {
-      "version": "1.0.7",
-      "resolved": "https://registry.npmmirror.com/path-parse/-/path-parse-1.0.7.tgz",
-      "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==",
-      "license": "MIT"
-    },
    "node_modules/path-scurry": {
      "version": "1.11.1",
      "resolved": "https://registry.npmmirror.com/path-scurry/-/path-scurry-1.11.1.tgz",
@@ -8142,26 +8115,6 @@
        "url": "https://paulmillr.com/funding/"
      }
    },
-    "node_modules/readline-sync": {
-      "version": "1.4.10",
-      "resolved": "https://registry.npmmirror.com/readline-sync/-/readline-sync-1.4.10.tgz",
-      "integrity": "sha512-gNva8/6UAe8QYepIQH/jQ2qn91Qj0B9sYjMBBs3QOB8F2CXcKgLxQaJRP76sWVRQt+QU+8fAkCbCvjjMFu7Ycw==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.8.0"
-      }
-    },
-    "node_modules/rechoir": {
-      "version": "0.6.2",
-      "resolved": "https://registry.npmmirror.com/rechoir/-/rechoir-0.6.2.tgz",
-      "integrity": "sha512-HFM8rkZ+i3zrV+4LQjwQ0W+ez98pApMGM3HUrN04j3CqzPOzl9nmP15Y8YXNm8QHGv/eacOVEjqhmWpkRV0NAw==",
-      "dependencies": {
-        "resolve": "^1.1.6"
-      },
-      "engines": {
-        "node": ">= 0.10"
-      }
-    },
    "node_modules/require-directory": {
      "version": "2.1.1",
      "resolved": "https://registry.npmmirror.com/require-directory/-/require-directory-2.1.1.tgz",
@@ -8199,26 +8152,6 @@
        "url": "https://github.com/sponsors/jet2jet"
      }
    },
-    "node_modules/resolve": {
-      "version": "1.22.11",
-      "resolved": "https://registry.npmmirror.com/resolve/-/resolve-1.22.11.tgz",
-      "integrity": "sha512-RfqAvLnMl313r7c9oclB1HhUEAezcpLjz95wFH4LVuhk9JF/r22qmVP9AMmOU4vMX7Q8pN8jwNg/CSpdFnMjTQ==",
-      "license": "MIT",
-      "dependencies": {
-        "is-core-module": "^2.16.1",
-        "path-parse": "^1.0.7",
-        "supports-preserve-symlinks-flag": "^1.0.0"
-      },
-      "bin": {
-        "resolve": "bin/resolve"
-      },
-      "engines": {
-        "node": ">= 0.4"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
    "node_modules/resolve-alpn": {
      "version": "1.2.1",
      "resolved": "https://registry.npmmirror.com/resolve-alpn/-/resolve-alpn-1.2.1.tgz",
@@ -8564,23 +8497,78 @@
        "node": ">=8"
      }
    },
-    "node_modules/shelljs": {
-      "version": "0.8.5",
-      "resolved": "https://registry.npmmirror.com/shelljs/-/shelljs-0.8.5.tgz",
-      "integrity": "sha512-TiwcRcrkhHvbrZbnRcFYMLl30Dfov3HKqzp5tO5b4pt6G/SezKcYhmDg15zXVBswHmctSAQKznqNW2LO5tTDow==",
-      "license": "BSD-3-Clause",
-      "dependencies": {
-        "glob": "^7.0.0",
-        "interpret": "^1.0.0",
-        "rechoir": "^0.6.2"
+    "node_modules/sherpa-onnx-darwin-arm64": {
+      "version": "1.12.23",
+      "resolved": "https://registry.npmmirror.com/sherpa-onnx-darwin-arm64/-/sherpa-onnx-darwin-arm64-1.12.23.tgz",
+      "integrity": "sha512-zbjNUUH/IXhjRyRJ9mpcWVOGIVr31a/qXBPsfOYc7U8cgwcq33Vmj2OzoLYWQF6T+puqCAE4nMxFAxJvdZekhg==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "darwin"
+      ]
    },
-      "bin": {
-        "shjs": "bin/shjs"
+    "node_modules/sherpa-onnx-linux-x64": {
+      "version": "1.12.23",
+      "resolved": "https://registry.npmmirror.com/sherpa-onnx-linux-x64/-/sherpa-onnx-linux-x64-1.12.23.tgz",
+      "integrity": "sha512-pUZIdDvPtyRXQDGo9R9MIBf2AFUzfgcGmutoulsEdH3hpK6JteR7Z/5pfrZIIqe/O99djAjEHK4AlwLHC2jiZw==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ]
    },
-      "engines": {
-        "node": ">=4"
+    "node_modules/sherpa-onnx-node": {
+      "version": "1.12.23",
+      "resolved": "https://registry.npmmirror.com/sherpa-onnx-node/-/sherpa-onnx-node-1.12.23.tgz",
+      "integrity": "sha512-09SRixVSjsajxeCV8Hy9R5J4IHPtw7vNgaIcEokdh/LpU7sY+e12z9uHHIMMMgNiInyGEH74wIwjLXms+W7qRA==",
+      "license": "Apache-2.0",
+      "optionalDependencies": {
+        "sherpa-onnx-darwin-arm64": "^1.12.23",
+        "sherpa-onnx-darwin-x64": "^1.12.23",
+        "sherpa-onnx-linux-arm64": "^1.12.23",
+        "sherpa-onnx-linux-x64": "^1.12.23",
+        "sherpa-onnx-win-ia32": "^1.12.23",
+        "sherpa-onnx-win-x64": "^1.12.23"
      }
    },
+    "node_modules/sherpa-onnx-node/node_modules/sherpa-onnx-darwin-x64": {
+      "optional": true
+    },
+    "node_modules/sherpa-onnx-node/node_modules/sherpa-onnx-linux-arm64": {
+      "optional": true
+    },
+    "node_modules/sherpa-onnx-win-ia32": {
+      "version": "1.12.23",
+      "resolved": "https://registry.npmmirror.com/sherpa-onnx-win-ia32/-/sherpa-onnx-win-ia32-1.12.23.tgz",
+      "integrity": "sha512-MyLsK7r6dd7paglyTgb8UHTXTEFqOzA91u6VDV64Lq8rDGuOFVYioxX7vlwmGe1A9o7VhuOPNaKcRjEPtVDhBQ==",
+      "cpu": [
+        "ia32"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "win32"
+      ]
+    },
+    "node_modules/sherpa-onnx-win-x64": {
+      "version": "1.12.23",
+      "resolved": "https://registry.npmmirror.com/sherpa-onnx-win-x64/-/sherpa-onnx-win-x64-1.12.23.tgz",
+      "integrity": "sha512-pdHEYMJiYy8+xzH2WkBVS4/hnRwqjY8FaWnjs0NBgQZnPmc/k4M+TAiauTOuFDNK4GPwFQnjwrCGx6jI9AOkOg==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "win32"
+      ]
+    },
    "node_modules/signal-exit": {
      "version": "3.0.7",
      "resolved": "https://registry.npmmirror.com/signal-exit/-/signal-exit-3.0.7.tgz",
@@ -8588,6 +8576,15 @@
      "dev": true,
      "license": "ISC"
    },
+    "node_modules/silk-wasm": {
+      "version": "3.7.1",
+      "resolved": "https://registry.npmmirror.com/silk-wasm/-/silk-wasm-3.7.1.tgz",
+      "integrity": "sha512-mXPwLRtZxrYV3TZx41jMAeKc80wvmyrcXIcs8HctFxK15Ahz2OJQENYhNgEPeCEOdI6Mbx1NxQsqxzwc3DKerw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=16.11.0"
+      }
+    },
    "node_modules/simple-concat": {
      "version": "1.0.1",
      "resolved": "https://registry.npmmirror.com/simple-concat/-/simple-concat-1.0.1.tgz",
@@ -8888,18 +8885,6 @@
        "node": ">=8"
      }
    },
-    "node_modules/supports-preserve-symlinks-flag": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmmirror.com/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz",
-      "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 0.4"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/ljharb"
-      }
-    },
    "node_modules/tar": {
      "version": "6.2.1",
      "resolved": "https://registry.npmmirror.com/tar/-/tar-6.2.1.tgz",
@@ -9602,19 +9587,6 @@
        "node": ">= 8"
      }
    },
-    "node_modules/whisper-node": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmmirror.com/whisper-node/-/whisper-node-1.1.1.tgz",
-      "integrity": "sha512-s1czx7pL0g63QOz0X9oAu7vOf4GzmFfQIy6J7msOAH5Yyiy+4a3w6+Uv0hiHvHkfBWk/+hG8nY3VEFdIapF83g==",
-      "license": "MIT",
-      "dependencies": {
-        "readline-sync": "^1.4.10",
-        "shelljs": "^0.8.5"
-      },
-      "bin": {
-        "download": "dist/download.js"
-      }
-    },
    "node_modules/wide-align": {
      "version": "1.1.5",
      "resolved": "https://registry.npmmirror.com/wide-align/-/wide-align-1.1.5.tgz",
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "weflow",
-  "version": "1.1.2",
+  "version": "1.2.0",
  "description": "WeFlow",
  "main": "dist-electron/main.js",
  "author": "cc",
@@ -30,8 +30,9 @@
    "react": "^19.2.3",
    "react-dom": "^19.2.3",
    "react-router-dom": "^7.1.1",
+    "sherpa-onnx-node": "^1.10.38",
+    "silk-wasm": "^3.7.1",
    "wechat-emojis": "^1.0.2",
-    "whisper-node": "^1.1.1",
    "zustand": "^5.0.2"
  },
  "devDependencies": {
@@ -102,7 +103,8 @@
      "dist-electron/**/*"
    ],
    "asarUnpack": [
-      "node_modules/ffmpeg-static/**/*"
+      "node_modules/silk-wasm/**/*",
+      "node_modules/sherpa-onnx-node/**/*"
    ]
  }
 }
--- a/resources/ggml-base.dll
+++ b/resources/ggml-base.dll
--- a/resources/ggml-cpu.dll
+++ b/resources/ggml-cpu.dll
--- a/resources/ggml.dll
+++ b/resources/ggml.dll
--- a/resources/wcdb_api.dll
+++ b/resources/wcdb_api.dll
--- a/resources/whisper-cli.exe
+++ b/resources/whisper-cli.exe
--- a/resources/whisper-main.exe
+++ b/resources/whisper-main.exe
--- a/resources/whisper.dll
+++ b/resources/whisper.dll
--- a/src/components/AnimatedStreamingText.tsx
+++ b/src/components/AnimatedStreamingText.tsx
@@ -0,0 +1,63 @@
+import React, { memo, useEffect, useState, useRef } from 'react'
+
+interface AnimatedStreamingTextProps {
+    text: string
+    className?: string
+    loading?: boolean
+}
+
+export const AnimatedStreamingText = memo(({ text, className, loading }: AnimatedStreamingTextProps) => {
+    const [displayedSegments, setDisplayedSegments] = useState<string[]>([])
+    const prevTextRef = useRef('')
+
+    useEffect(() => {
+        const currentText = (text || '').trim()
+        const prevText = prevTextRef.current
+
+        if (currentText === prevText) return
+        if (!currentText.startsWith(prevText) && prevText !== '') {
+            // 如果不是追加而是全新的文本（比如重新识别），则重置
+            setDisplayedSegments([currentText])
+            prevTextRef.current = currentText
+            return
+        }
+
+        const newPart = currentText.slice(prevText.length)
+        if (newPart) {
+            // 将新部分作为单独的段加入，以触发动画
+            setDisplayedSegments(prev => [...prev, newPart])
+        }
+        prevTextRef.current = currentText
+    }, [text])
+
+    // 处理 loading 状态的显示
+    if (loading && !text) {
+        return <span className={className}>转写中<span className="dot-flashing">...</span></span>
+    }
+
+    return (
+        <span className={className}>
+            {displayedSegments.map((segment, index) => (
+                <span key={index} className="fade-in-text">
+                    {segment}
+                </span>
+            ))}
+            <style>{`
+        .fade-in-text {
+          animation: fadeIn 0.5s ease-out forwards;
+          opacity: 0;
+        }
+        @keyframes fadeIn {
+          from { opacity: 0; transform: translateY(2px); }
+          to { opacity: 1; transform: translateY(0); }
+        }
+        .dot-flashing {
+           animation: blink 1s infinite;
+        }
+        @keyframes blink { 50% { opacity: 0; } }
+      `}</style>
+        </span>
+    )
+})
+
+AnimatedStreamingText.displayName = 'AnimatedStreamingText'
--- a/src/components/VoiceTranscribeDialog.scss
+++ b/src/components/VoiceTranscribeDialog.scss
@@ -0,0 +1,255 @@
+.voice-transcribe-dialog-overlay {
+    position: fixed;
+    top: 0;
+    left: 0;
+    right: 0;
+    bottom: 0;
+    background: rgba(0, 0, 0, 0.6);
+    backdrop-filter: blur(4px);
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    z-index: 10000;
+    animation: fadeIn 0.2s ease-out;
+}
+
+.voice-transcribe-dialog {
+    background: var(--color-bg-elevated);
+    border-radius: 16px;
+    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.3);
+    width: 90%;
+    max-width: 480px;
+    animation: slideUp 0.3s ease-out;
+    overflow: hidden;
+}
+
+.dialog-header {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    padding: 20px 24px;
+    border-bottom: 1px solid var(--color-border);
+
+    h3 {
+        margin: 0;
+        font-size: 18px;
+        font-weight: 600;
+        color: var(--color-text-primary);
+    }
+
+    .close-button {
+        background: none;
+        border: none;
+        cursor: pointer;
+        padding: 4px;
+        color: var(--color-text-secondary);
+        border-radius: 6px;
+        transition: all 0.15s ease;
+
+        &:hover {
+            background: var(--color-bg-hover);
+            color: var(--color-text-primary);
+        }
+    }
+}
+
+.dialog-content {
+    padding: 24px;
+}
+
+.info-section {
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    text-align: center;
+    gap: 16px;
+
+    .info-icon {
+        color: var(--color-primary);
+        opacity: 0.8;
+    }
+
+    .info-text {
+        font-size: 15px;
+        color: var(--color-text-primary);
+        margin: 0;
+    }
+
+    .model-info {
+        width: 100%;
+        background: var(--color-bg);
+        border-radius: 12px;
+        padding: 16px;
+        display: flex;
+        flex-direction: column;
+        gap: 12px;
+
+        .model-item {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            font-size: 14px;
+
+            .label {
+                color: var(--color-text-secondary);
+            }
+
+            .value {
+                color: var(--color-text-primary);
+                font-weight: 500;
+            }
+        }
+    }
+}
+
+.download-section {
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    gap: 16px;
+    padding: 20px 0;
+
+    .download-icon {
+        .downloading-icon {
+            color: var(--color-primary);
+            animation: bounce 1s ease-in-out infinite;
+        }
+    }
+
+    .download-text {
+        font-size: 15px;
+        color: var(--color-text-primary);
+        margin: 0;
+    }
+
+    .progress-bar {
+        width: 100%;
+        height: 6px;
+        background: var(--color-bg);
+        border-radius: 3px;
+        overflow: hidden;
+
+        .progress-fill {
+            height: 100%;
+            background: linear-gradient(90deg, var(--color-primary), var(--color-accent));
+            border-radius: 3px;
+            transition: width 0.3s ease;
+        }
+    }
+
+    .progress-text {
+        font-size: 14px;
+        color: var(--color-text-secondary);
+        margin: 0;
+        font-variant-numeric: tabular-nums;
+    }
+}
+
+.complete-section {
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    gap: 16px;
+    padding: 20px 0;
+
+    .complete-icon {
+        color: var(--color-success);
+    }
+
+    .complete-text {
+        font-size: 15px;
+        color: var(--color-text-primary);
+        margin: 0;
+    }
+}
+
+.error-message {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    padding: 12px 16px;
+    background: rgba(239, 68, 68, 0.1);
+    border: 1px solid rgba(239, 68, 68, 0.3);
+    border-radius: 8px;
+    color: #ef4444;
+    font-size: 14px;
+    margin-top: 16px;
+}
+
+.dialog-actions {
+    display: flex;
+    gap: 12px;
+    margin-top: 24px;
+
+    button {
+        flex: 1;
+        padding: 12px 20px;
+        border-radius: 8px;
+        font-size: 14px;
+        font-weight: 500;
+        cursor: pointer;
+        transition: all 0.15s ease;
+        border: none;
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        gap: 6px;
+
+        &.btn-secondary {
+            background: var(--color-bg);
+            color: var(--color-text-primary);
+
+            &:hover {
+                background: var(--color-bg-hover);
+            }
+        }
+
+        &.btn-primary {
+            background: var(--color-primary);
+            color: white;
+
+            &:hover {
+                opacity: 0.9;
+                transform: translateY(-1px);
+            }
+
+            &:active {
+                transform: translateY(0);
+            }
+        }
+    }
+}
+
+@keyframes fadeIn {
+    from {
+        opacity: 0;
+    }
+
+    to {
+        opacity: 1;
+    }
+}
+
+@keyframes slideUp {
+    from {
+        opacity: 0;
+        transform: translateY(20px);
+    }
+
+    to {
+        opacity: 1;
+        transform: translateY(0);
+    }
+}
+
+@keyframes bounce {
+
+    0%,
+    100% {
+        transform: translateY(0);
+    }
+
+    50% {
+        transform: translateY(-10px);
+    }
+}
--- a/src/components/VoiceTranscribeDialog.tsx
+++ b/src/components/VoiceTranscribeDialog.tsx
@@ -0,0 +1,145 @@
+import React, { useState, useEffect } from 'react'
+import { Download, X, CheckCircle, AlertCircle } from 'lucide-react'
+import './VoiceTranscribeDialog.scss'
+
+interface VoiceTranscribeDialogProps {
+    onClose: () => void
+    onDownloadComplete: () => void
+}
+
+export const VoiceTranscribeDialog: React.FC<VoiceTranscribeDialogProps> = ({
+    onClose,
+    onDownloadComplete
+}) => {
+    const [isDownloading, setIsDownloading] = useState(false)
+    const [downloadProgress, setDownloadProgress] = useState(0)
+    const [downloadError, setDownloadError] = useState<string | null>(null)
+    const [isComplete, setIsComplete] = useState(false)
+
+    useEffect(() => {
+        // 监听下载进度
+        const removeListener = window.electronAPI.whisper?.onDownloadProgress?.((payload) => {
+            if (payload.percent !== undefined) {
+                setDownloadProgress(payload.percent)
+            }
+        })
+
+        return () => {
+            removeListener?.()
+        }
+    }, [])
+
+    const handleDownload = async () => {
+        setIsDownloading(true)
+        setDownloadError(null)
+        setDownloadProgress(0)
+
+        try {
+            const result = await window.electronAPI.whisper?.downloadModel()
+
+            if (result?.success) {
+                setIsComplete(true)
+                setDownloadProgress(100)
+
+                // 延迟关闭弹窗并触发转写
+                setTimeout(() => {
+                    onDownloadComplete()
+                }, 1000)
+            } else {
+                setDownloadError(result?.error || '下载失败')
+                setIsDownloading(false)
+            }
+        } catch (error) {
+            setDownloadError(String(error))
+            setIsDownloading(false)
+        }
+    }
+
+    const handleCancel = () => {
+        if (!isDownloading) {
+            onClose()
+        }
+    }
+
+    return (
+        <div className="voice-transcribe-dialog-overlay" onClick={handleCancel}>
+            <div className="voice-transcribe-dialog" onClick={(e) => e.stopPropagation()}>
+                <div className="dialog-header">
+                    <h3>语音转文字</h3>
+                    {!isDownloading && (
+                        <button className="close-button" onClick={onClose}>
+                            <X size={20} />
+                        </button>
+                    )}
+                </div>
+
+                <div className="dialog-content">
+                    {!isDownloading && !isComplete && (
+                        <>
+                            <div className="info-section">
+                                <AlertCircle size={48} className="info-icon" />
+                                <p className="info-text">
+                                    首次使用语音转文字功能需要下载 AI 模型
+                                </p>
+                                <div className="model-info">
+                                    <div className="model-item">
+                                        <span className="label">模型名称：</span>
+                                        <span className="value">SenseVoiceSmall</span>
+                                    </div>
+                                    <div className="model-item">
+                                        <span className="label">文件大小：</span>
+                                        <span className="value">约 240 MB</span>
+                                    </div>
+                                    <div className="model-item">
+                                        <span className="label">支持语言：</span>
+                                        <span className="value">中文、英文、日文、韩文</span>
+                                    </div>
+                                </div>
+                            </div>
+
+                            {downloadError && (
+                                <div className="error-message">
+                                    <AlertCircle size={16} />
+                                    <span>{downloadError}</span>
+                                </div>
+                            )}
+
+                            <div className="dialog-actions">
+                                <button className="btn-secondary" onClick={onClose}>
+                                    取消
+                                </button>
+                                <button className="btn-primary" onClick={handleDownload}>
+                                    <Download size={16} />
+                                    <span>立即下载</span>
+                                </button>
+                            </div>
+                        </>
+                    )}
+
+                    {isDownloading && !isComplete && (
+                        <div className="download-section">
+                            <div className="download-icon">
+                                <Download size={48} className="downloading-icon" />
+                            </div>
+                            <p className="download-text">正在下载模型...</p>
+                            <div className="progress-bar">
+                                <div
+                                    className="progress-fill"
+                                    style={{ width: `${downloadProgress}%` }}
+                                />
+                            </div>
+                            <p className="progress-text">{downloadProgress.toFixed(1)}%</p>
+                        </div>
+                    )}
+
+                    {isComplete && (
+                        <div className="complete-section">
+                            <CheckCircle size={48} className="complete-icon" />
+                            <p className="complete-text">下载完成！正在转写语音...</p>
+                        </div>
+                    )}
+                </div>
+            </div>
+        </div>
+    )
+}
--- a/src/pages/ChatPage.scss
+++ b/src/pages/ChatPage.scss
@@ -1882,3 +1882,31 @@
    transform: translateX(0);
  }
 }
+/* 语音转文字按钮样式 */
+.voice-transcribe-btn {
+  width: 28px;
+  height: 28px;
+  padding: 0;
+  margin-left: 8px;
+  border: none;
+  background: var(--primary-light);
+  border-radius: 50%;
+  color: var(--primary);
+  cursor: pointer;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  transition: all 0.2s;
+  flex-shrink: 0;
+
+  &:hover {
+    background: var(--primary);
+    color: #fff;
+    transform: scale(1.05);
+  }
+
+  svg {
+    width: 14px;
+    height: 14px;
+  }
+}
--- a/src/pages/ChatPage.tsx
+++ b/src/pages/ChatPage.tsx
@@ -5,8 +5,21 @@ import { useChatStore } from '../stores/chatStore'
 import type { ChatSession, Message } from '../types/models'
 import { getEmojiPath } from 'wechat-emojis'
 import { ImagePreview } from '../components/ImagePreview'
+import { VoiceTranscribeDialog } from '../components/VoiceTranscribeDialog'
+import { AnimatedStreamingText } from '../components/AnimatedStreamingText'
 import './ChatPage.scss'

+// 系统消息类型常量
+const SYSTEM_MESSAGE_TYPES = [
+  10000,        // 系统消息
+  266287972401, // 拍一拍
+]
+
+// 判断是否为系统消息
+function isSystemMessage(localType: number): boolean {
+  return SYSTEM_MESSAGE_TYPES.includes(localType)
+}
+
 interface ChatPageProps {
  // 保留接口以备将来扩展
 }
@@ -138,6 +151,8 @@ function ChatPage(_props: ChatPageProps) {
  const [highlightedMessageKeys, setHighlightedMessageKeys] = useState<string[]>([])
  const [isRefreshingSessions, setIsRefreshingSessions] = useState(false)
  const [hasInitialMessages, setHasInitialMessages] = useState(false)
+  const [showVoiceTranscribeDialog, setShowVoiceTranscribeDialog] = useState(false)
+  const [pendingVoiceTranscriptRequest, setPendingVoiceTranscriptRequest] = useState<{ sessionId: string; messageId: string } | null>(null)

  // 联系人信息加载控制
  const isEnrichingRef = useRef(false)
@@ -1128,10 +1143,10 @@ function ChatPage(_props: ChatPageProps) {
                  const prevMsg = index > 0 ? messages[index - 1] : undefined
                  const showDateDivider = shouldShowDateDivider(msg, prevMsg)

-                  // 显示时间：第一条消息，或者与上一条消息间隔超过5分钟
+                  // 显示时间:第一条消息,或者与上一条消息间隔超过5分钟
                  const showTime = !prevMsg || (msg.createTime - prevMsg.createTime > 300)
                  const isSent = msg.isSend === 1
-                  const isSystem = msg.localType === 10000
+                  const isSystem = isSystemMessage(msg.localType)

                  // 系统消息居中显示
                  const wrapperClass = isSystem ? 'system' : (isSent ? 'sent' : 'received')
@@ -1272,6 +1287,35 @@ function ChatPage(_props: ChatPageProps) {
          </div>
        )}
      </div>
+
+      {/* 语音转文字模型下载弹窗 */}
+      {showVoiceTranscribeDialog && (
+        <VoiceTranscribeDialog
+          onClose={() => {
+            setShowVoiceTranscribeDialog(false)
+            setPendingVoiceTranscriptRequest(null)
+          }}
+          onDownloadComplete={async () => {
+            setShowVoiceTranscribeDialog(false)
+            // 下载完成后，继续转写
+            if (pendingVoiceTranscriptRequest) {
+              try {
+                const result = await window.electronAPI.chat.getVoiceTranscript(
+                  pendingVoiceTranscriptRequest.sessionId,
+                  pendingVoiceTranscriptRequest.messageId
+                )
+                if (result.success) {
+                  const cacheKey = `voice-transcript:${pendingVoiceTranscriptRequest.messageId}`
+                  voiceTranscriptCache.set(cacheKey, (result.transcript || '').trim())
+                }
+              } catch (error) {
+                console.error('[ChatPage] 语音转文字失败:', error)
+              }
+            }
+            setPendingVoiceTranscriptRequest(null)
+          }}
+        />
+      )}
    </div>
  )
 }
@@ -1292,7 +1336,7 @@ function MessageBubble({ message, session, showTime, myAvatarUrl, isGroupChat }:
  myAvatarUrl?: string;
  isGroupChat?: boolean;
 }) {
-  const isSystem = message.localType === 10000
+  const isSystem = isSystemMessage(message.localType)
  const isEmoji = message.localType === 47
  const isImage = message.localType === 3
  const isVoice = message.localType === 34
@@ -1612,8 +1656,32 @@ function MessageBubble({ message, session, showTime, myAvatarUrl, isGroupChat }:
    }
  }, [isVoice])

+  // 监听流式转写结果
+  useEffect(() => {
+    if (!isVoice) return
+    const removeListener = window.electronAPI.chat.onVoiceTranscriptPartial?.((payload: { msgId: string; text: string }) => {
+      if (payload.msgId === String(message.localId)) {
+        setVoiceTranscript(payload.text)
+        voiceTranscriptCache.set(voiceTranscriptCacheKey, payload.text)
+      }
+    })
+    return () => removeListener?.()
+  }, [isVoice, message.localId, voiceTranscriptCacheKey])
+
  const requestVoiceTranscript = useCallback(async () => {
    if (voiceTranscriptLoading || voiceTranscriptRequestedRef.current) return
+
+    // 检查模型状态
+    const modelStatus = await window.electronAPI.whisper?.getModelStatus()
+    if (!modelStatus?.exists) {
+      // 模型未下载，抛出错误让外层处理
+      const error: any = new Error('MODEL_NOT_DOWNLOADED')
+      error.requiresDownload = true
+      error.sessionId = session.username
+      error.messageId = String(message.localId)
+      throw error
+    }
+
    voiceTranscriptRequestedRef.current = true
    setVoiceTranscriptLoading(true)
    setVoiceTranscriptError(false)
@@ -1627,7 +1695,13 @@ function MessageBubble({ message, session, showTime, myAvatarUrl, isGroupChat }:
        setVoiceTranscriptError(true)
        voiceTranscriptRequestedRef.current = false
      }
-    } catch {
+    } catch (error: any) {
+      // 检查是否是模型未下载错误
+      if (error?.requiresDownload) {
+        // 不显示错误状态，等待用户手动点击转文字按钮时会触发下载弹窗
+        voiceTranscriptRequestedRef.current = false
+        return
+      }
      setVoiceTranscriptError(true)
      voiceTranscriptRequestedRef.current = false
    } finally {
@@ -1635,13 +1709,23 @@ function MessageBubble({ message, session, showTime, myAvatarUrl, isGroupChat }:
    }
  }, [message.localId, session.username, voiceTranscriptCacheKey, voiceTranscriptLoading])

+  // 根据设置决定是否自动转写
+  const [autoTranscribeEnabled, setAutoTranscribeEnabled] = useState(false)
+
  useEffect(() => {
+    window.electronAPI.config.get('autoTranscribeVoice').then((value) => {
+      setAutoTranscribeEnabled(value === true)
+    })
+  }, [])
+
+  useEffect(() => {
+    if (!autoTranscribeEnabled) return
    if (!isVoice) return
    if (!voiceDataUrl) return
    if (voiceTranscriptError) return
    if (voiceTranscriptLoading || voiceTranscript !== undefined || voiceTranscriptRequestedRef.current) return
    void requestVoiceTranscript()
-  }, [isVoice, voiceDataUrl, voiceTranscript, voiceTranscriptError, voiceTranscriptLoading, requestVoiceTranscript])
+  }, [autoTranscribeEnabled, isVoice, voiceDataUrl, voiceTranscript, voiceTranscriptError, voiceTranscriptLoading, requestVoiceTranscript])

  if (isSystem) {
    return (
@@ -1771,7 +1855,12 @@ function MessageBubble({ message, session, showTime, myAvatarUrl, isGroupChat }:
          setVoiceLoading(true)
          setVoiceError(false)
          try {
-            const result = await window.electronAPI.chat.getVoiceData(session.username, String(message.localId))
+            const result = await window.electronAPI.chat.getVoiceData(
+              session.username,
+              String(message.localId),
+              message.createTime,
+              message.serverId
+            )
            if (result.success && result.data) {
              const url = `data:audio/wav;base64,${result.data}`
              voiceDataUrlCache.set(voiceCacheKey, url)
@@ -1842,6 +1931,22 @@ function MessageBubble({ message, session, showTime, myAvatarUrl, isGroupChat }:
              {showDecryptHint && <span className="voice-hint">点击解密</span>}
              {voiceError && <span className="voice-error">播放失败</span>}
            </div>
+            {/* 转文字按钮 */}
+            {voiceDataUrl && !voiceTranscript && !voiceTranscriptLoading && (
+              <button
+                className="voice-transcribe-btn"
+                onClick={(e) => {
+                  e.stopPropagation()
+                  void requestVoiceTranscript()
+                }}
+                title="转文字"
+                type="button"
+              >
+                <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
+                  <path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z" />
+                </svg>
+              </button>
+            )}
          </div>
          {showTranscript && (
            <div
@@ -1849,7 +1954,16 @@ function MessageBubble({ message, session, showTime, myAvatarUrl, isGroupChat }:
              onClick={handleTranscriptRetry}
              title={voiceTranscriptError ? '点击重试语音转写' : undefined}
            >
-              {transcriptDisplay}
+              {voiceTranscriptError ? (
+                '转写失败，点击重试'
+              ) : !voiceTranscript ? (
+                voiceTranscriptLoading ? '转写中...' : '未识别到文字'
+              ) : (
+                <AnimatedStreamingText
+                  text={transcriptText}
+                  loading={voiceTranscriptLoading}
+                />
+              )}
            </div>
          )}
        </div>
--- a/src/pages/ExportPage.tsx
+++ b/src/pages/ExportPage.tsx
@@ -20,6 +20,7 @@ interface ExportOptions {
  exportImages: boolean
  exportVoices: boolean
  exportEmojis: boolean
+  exportVoiceAsText: boolean
 }

 interface ExportResult {
@@ -54,7 +55,8 @@ function ExportPage() {
    exportMedia: false,
    exportImages: true,
    exportVoices: true,
-    exportEmojis: true
+    exportEmojis: true,
+    exportVoiceAsText: false
  })

  const loadSessions = useCallback(async () => {
@@ -158,6 +160,7 @@ function ExportPage() {
        exportImages: options.exportMedia && options.exportImages,
        exportVoices: options.exportMedia && options.exportVoices,
        exportEmojis: options.exportMedia && options.exportEmojis,
+        exportVoiceAsText: options.exportMedia && options.exportVoiceAsText,
        dateRange: options.useAllTime ? null : options.dateRange ? {
          start: Math.floor(options.dateRange.start.getTime() / 1000),
          // 将结束日期设置为当天的 23:59:59,以包含当天的所有消息
@@ -405,6 +408,21 @@ function ExportPage() {

              <div className="media-option-divider"></div>

+              <label className={`media-checkbox-row ${!options.exportMedia ? 'disabled' : ''}`}>
+                <div className="media-checkbox-info">
+                  <span className="media-checkbox-title">语音转文字</span>
+                  <span className="media-checkbox-desc">将语音消息转换为文字导出</span>
+                </div>
+                <input
+                  type="checkbox"
+                  checked={options.exportVoiceAsText}
+                  disabled={!options.exportMedia}
+                  onChange={e => setOptions({ ...options, exportVoiceAsText: e.target.checked })}
+                />
+              </label>
+
+              <div className="media-option-divider"></div>
+
              <label className={`media-checkbox-row ${!options.exportMedia ? 'disabled' : ''}`}>
                <div className="media-checkbox-info">
                  <span className="media-checkbox-title">表情</span>
--- a/src/pages/SettingsPage.tsx
+++ b/src/pages/SettingsPage.tsx
@@ -13,19 +13,6 @@ import './SettingsPage.scss'

 type SettingsTab = 'appearance' | 'database' | 'whisper' | 'cache' | 'about'

-const whisperModels = [
-  { value: 'tiny', label: 'tiny (75 MB)' },
-  { value: 'base', label: 'base (142 MB)' },
-  { value: 'small', label: 'small (466 MB)' },
-  { value: 'medium', label: 'medium (1.5 GB)' },
-  { value: 'large-v3', label: 'large-v3 (2.9 GB)' }
-]
-
-const whisperSources = [
-  { value: 'official', label: 'HuggingFace 官方' },
-  { value: 'tsinghua', label: '清华镜像 (hf-mirror)' }
-]
-
 const tabs: { id: SettingsTab; label: string; icon: React.ElementType }[] = [
  { id: 'appearance', label: '外观', icon: Palette },
  { id: 'database', label: '数据库连接', icon: Database },
@@ -57,10 +44,10 @@ function SettingsPage() {
  const [logEnabled, setLogEnabled] = useState(false)
  const [whisperModelName, setWhisperModelName] = useState('base')
  const [whisperModelDir, setWhisperModelDir] = useState('')
-  const [whisperDownloadSource, setWhisperDownloadSource] = useState('tsinghua')
  const [isWhisperDownloading, setIsWhisperDownloading] = useState(false)
  const [whisperDownloadProgress, setWhisperDownloadProgress] = useState(0)
-  const [whisperModelStatus, setWhisperModelStatus] = useState<{ exists: boolean; path?: string } | null>(null)
+  const [whisperModelStatus, setWhisperModelStatus] = useState<{ exists: boolean; modelPath?: string; tokensPath?: string } | null>(null)
+  const [autoTranscribeVoice, setAutoTranscribeVoice] = useState(false)

  const [isLoading, setIsLoadingState] = useState(false)
  const [isTesting, setIsTesting] = useState(false)
@@ -124,7 +111,7 @@ function SettingsPage() {
      const savedImageAesKey = await configService.getImageAesKey()
      const savedWhisperModelName = await configService.getWhisperModelName()
      const savedWhisperModelDir = await configService.getWhisperModelDir()
-      const savedWhisperSource = await configService.getWhisperDownloadSource()
+      const savedAutoTranscribe = await configService.getAutoTranscribeVoice()

      if (savedKey) setDecryptKey(savedKey)
      if (savedPath) setDbPath(savedPath)
@@ -135,9 +122,8 @@ function SettingsPage() {
      }
      if (savedImageAesKey) setImageAesKey(savedImageAesKey)
      setLogEnabled(savedLogEnabled)
-      if (savedWhisperModelName) setWhisperModelName(savedWhisperModelName)
+      setAutoTranscribeVoice(savedAutoTranscribe)
      if (savedWhisperModelDir) setWhisperModelDir(savedWhisperModelDir)
-      if (savedWhisperSource) setWhisperDownloadSource(savedWhisperSource)
    } catch (e) {
      console.error('加载配置失败:', e)
    }
@@ -145,14 +131,15 @@ function SettingsPage() {



-  const refreshWhisperStatus = async (modelNameValue = whisperModelName, modelDirValue = whisperModelDir) => {
+  const refreshWhisperStatus = async (modelDirValue = whisperModelDir) => {
    try {
-      const result = await window.electronAPI.whisper?.getModelStatus({
-        modelName: modelNameValue,
-        downloadDir: modelDirValue || undefined
-      })
+      const result = await window.electronAPI.whisper?.getModelStatus()
      if (result?.success) {
-        setWhisperModelStatus({ exists: Boolean(result.exists), path: result.path })
+        setWhisperModelStatus({
+          exists: Boolean(result.exists),
+          modelPath: result.modelPath,
+          tokensPath: result.tokensPath
+        })
      }
    } catch {
      setWhisperModelStatus(null)
@@ -178,17 +165,16 @@ function SettingsPage() {

  useEffect(() => {
    const removeListener = window.electronAPI.whisper?.onDownloadProgress?.((payload) => {
-      if (payload.modelName !== whisperModelName) return
      if (typeof payload.percent === 'number') {
        setWhisperDownloadProgress(payload.percent)
      }
    })
    return () => removeListener?.()
-  }, [whisperModelName])
+  }, [])

  useEffect(() => {
-    void refreshWhisperStatus(whisperModelName, whisperModelDir)
-  }, [whisperModelName, whisperModelDir])
+    void refreshWhisperStatus(whisperModelDir)
+  }, [whisperModelDir])

  const handleCheckUpdate = async () => {
    setIsCheckingUpdate(true)
@@ -331,30 +317,21 @@ function SettingsPage() {
    await configService.setWhisperModelName(value)
  }

-  const handleWhisperSourceChange = async (value: string) => {
-    setWhisperDownloadSource(value)
-    await configService.setWhisperDownloadSource(value)
-  }
-
  const handleDownloadWhisperModel = async () => {
    if (isWhisperDownloading) return
    setIsWhisperDownloading(true)
    setWhisperDownloadProgress(0)
    try {
-      const result = await window.electronAPI.whisper.downloadModel({
-        modelName: whisperModelName,
-        downloadDir: whisperModelDir || undefined,
-        source: whisperDownloadSource
-      })
+      const result = await window.electronAPI.whisper.downloadModel()
      if (result.success) {
        setWhisperDownloadProgress(100)
-        showMessage('Whisper 模型下载完成', true)
-        await refreshWhisperStatus(whisperModelName, whisperModelDir)
+        showMessage('SenseVoiceSmall 模型下载完成', true)
+        await refreshWhisperStatus(whisperModelDir)
      } else {
-        showMessage(result.error || 'Whisper 模型下载失败', false)
+        showMessage(result.error || '模型下载失败', false)
      }
    } catch (e) {
-      showMessage(`Whisper 模型下载失败: ${e}`, false)
+      showMessage(`模型下载失败: ${e}`, false)
    } finally {
      setIsWhisperDownloading(false)
    }
@@ -475,9 +452,8 @@ function SettingsPage() {
      } else {
        await configService.setImageAesKey('')
      }
-      await configService.setWhisperModelName(whisperModelName)
      await configService.setWhisperModelDir(whisperModelDir)
-      await configService.setWhisperDownloadSource(whisperDownloadSource)
+      await configService.setAutoTranscribeVoice(autoTranscribeVoice)
      await configService.setOnboardingDone(true)

      showMessage('配置保存成功，正在测试连接...', true)
@@ -513,9 +489,8 @@ function SettingsPage() {
      setWxid('')
      setCachePath('')
      setLogEnabled(false)
-      setWhisperModelName('base')
+      setAutoTranscribeVoice(false)
      setWhisperModelDir('')
-      setWhisperDownloadSource('tsinghua')
      setWhisperModelStatus(null)
      setWhisperDownloadProgress(0)
      setIsWhisperDownloading(false)
@@ -759,34 +734,31 @@ function SettingsPage() {
  )
  const renderWhisperTab = () => (
    <div className="tab-content">
-      <p className="section-desc">语音解密后自动转写为文字</p>
+      <div className="form-group">
+        <label>自动语音转文字</label>
+        <span className="form-hint">语音解密后自动转写为文字（需下载模型）</span>
+        <div className="log-toggle-line">
+          <span className="log-status">{autoTranscribeVoice ? '已开启' : '已关闭'}</span>
+          <label className="switch" htmlFor="auto-transcribe-toggle">
+            <input
+              id="auto-transcribe-toggle"
+              className="switch-input"
+              type="checkbox"
+              checked={autoTranscribeVoice}
+              onChange={async (e) => {
+                const enabled = e.target.checked
+                setAutoTranscribeVoice(enabled)
+                await configService.setAutoTranscribeVoice(enabled)
+                showMessage(enabled ? '已开启自动转文字' : '已关闭自动转文字', true)
+              }}
+            />
+            <span className="switch-slider" />
+          </label>
+        </div>
+      </div>
      <div className="form-group whisper-section">
-        <label>语音识别模型 (Whisper)</label>
-        <span className="form-hint">语音解密后自动转文字，模型越大越准确但下载更慢</span>
-        <div className="whisper-grid">
-          <div className="whisper-field">
-            <span className="field-label">模型</span>
-            <select
-              value={whisperModelName}
-              onChange={(e) => handleWhisperModelChange(e.target.value)}
-            >
-              {whisperModels.map((model) => (
-                <option key={model.value} value={model.value}>{model.label}</option>
-              ))}
-            </select>
-          </div>
-          <div className="whisper-field">
-            <span className="field-label">下载源</span>
-            <select
-              value={whisperDownloadSource}
-              onChange={(e) => handleWhisperSourceChange(e.target.value)}
-            >
-              {whisperSources.map((source) => (
-                <option key={source.value} value={source.value}>{source.label}</option>
-              ))}
-            </select>
-          </div>
-        </div>
+        <label>语音识别模型 (SenseVoiceSmall)</label>
+        <span className="form-hint">基于 Sherpa-onnx，支持中文、英文、日文、韩文</span>
        <span className="form-hint">模型下载目录</span>
        <input
          type="text"
@@ -801,9 +773,9 @@ function SettingsPage() {
        </div>
        <div className="whisper-status-line">
          <span className={`status ${whisperModelStatus?.exists ? 'ok' : 'warn'}`}>
-            {whisperModelStatus?.exists ? '已下载' : '未下载'}
+            {whisperModelStatus?.exists ? '已下载 (240 MB)' : '未下载 (240 MB)'}
          </span>
-          {whisperModelStatus?.path && <span className="path">{whisperModelStatus.path}</span>}
+          {whisperModelStatus?.modelPath && <span className="path">{whisperModelStatus.modelPath}</span>}
        </div>
        {isWhisperDownloading ? (
          <div className="whisper-progress">
--- a/src/services/config.ts
+++ b/src/services/config.ts
@@ -20,7 +20,8 @@ export const CONFIG_KEYS = {
  IMAGE_AES_KEY: 'imageAesKey',
  WHISPER_MODEL_NAME: 'whisperModelName',
  WHISPER_MODEL_DIR: 'whisperModelDir',
-  WHISPER_DOWNLOAD_SOURCE: 'whisperDownloadSource'
+  WHISPER_DOWNLOAD_SOURCE: 'whisperDownloadSource',
+  AUTO_TRANSCRIBE_VOICE: 'autoTranscribeVoice'
 } as const

 // 获取解密密钥
@@ -218,3 +219,14 @@ export async function getOnboardingDone(): Promise<boolean> {
 export async function setOnboardingDone(done: boolean): Promise<void> {
  await config.set(CONFIG_KEYS.ONBOARDING_DONE, done)
 }
+
+// 获取自动语音转文字开关
+export async function getAutoTranscribeVoice(): Promise<boolean> {
+  const value = await config.get(CONFIG_KEYS.AUTO_TRANSCRIBE_VOICE)
+  return value === true
+}
+
+// 设置自动语音转文字开关
+export async function setAutoTranscribeVoice(enabled: boolean): Promise<void> {
+  await config.set(CONFIG_KEYS.AUTO_TRANSCRIBE_VOICE, enabled)
+}
--- a/src/types/electron.d.ts
+++ b/src/types/electron.d.ts
@@ -94,8 +94,9 @@ export interface ElectronAPI {
      error?: string
    }>
    getImageData: (sessionId: string, msgId: string) => Promise<{ success: boolean; data?: string; error?: string }>
-    getVoiceData: (sessionId: string, msgId: string) => Promise<{ success: boolean; data?: string; error?: string }>
+    getVoiceData: (sessionId: string, msgId: string, createTime?: number, serverId?: string | number) => Promise<{ success: boolean; data?: string; error?: string }>
    getVoiceTranscript: (sessionId: string, msgId: string) => Promise<{ success: boolean; transcript?: string; error?: string }>
+    onVoiceTranscriptPartial: (callback: (payload: { msgId: string; text: string }) => void) => () => void
  }

  image: {
@@ -297,8 +298,8 @@ export interface ElectronAPI {
    }>
  }
  whisper: {
-    downloadModel: (payload: { modelName: string; downloadDir?: string; source?: string }) => Promise<{ success: boolean; path?: string; error?: string }>
-    getModelStatus: (payload: { modelName: string; downloadDir?: string }) => Promise<{ success: boolean; exists?: boolean; path?: string; sizeBytes?: number; error?: string }>
+    downloadModel: () => Promise<{ success: boolean; modelPath?: string; tokensPath?: string; error?: string }>
+    getModelStatus: () => Promise<{ success: boolean; exists?: boolean; modelPath?: string; tokensPath?: string; sizeBytes?: number; error?: string }>
    onDownloadProgress: (callback: (payload: { modelName: string; downloadedBytes: number; totalBytes?: number; percent?: number }) => void) => () => void
  }
 }
--- a/vite.config.ts
+++ b/vite.config.ts
@@ -89,6 +89,23 @@ export default defineConfig({
          }
        }
      },
+      {
+        entry: 'electron/transcribeWorker.ts',
+        vite: {
+          build: {
+            outDir: 'dist-electron',
+            rollupOptions: {
+              external: [
+                'sherpa-onnx-node'
+              ],
+              output: {
+                entryFileNames: 'transcribeWorker.js',
+                inlineDynamicImports: true
+              }
+            }
+          }
+        }
+      },
      {
        entry: 'electron/preload.ts',
        onstart(options) {