import * as fs from 'fs' import * as path from 'path' import * as http from 'http' import * as https from 'https' import { fileURLToPath } from 'url' import ExcelJS from 'exceljs' import { getEmojiPath } from 'wechat-emojis' import { ConfigService } from './config' import { wcdbService } from './wcdbService' import { imageDecryptService } from './imageDecryptService' import { chatService } from './chatService' import { videoService } from './videoService' import { EXPORT_HTML_STYLES } from './exportHtmlStyles' // ChatLab 格式类型定义 interface ChatLabHeader { version: string exportedAt: number generator: string description?: string } interface ChatLabMeta { name: string platform: string type: 'group' | 'private' groupId?: string groupAvatar?: string } interface ChatLabMember { platformId: string accountName: string groupNickname?: string avatar?: string } interface ChatLabMessage { sender: string accountName: string groupNickname?: string timestamp: number type: number content: string | null } interface ChatLabExport { chatlab: ChatLabHeader meta: ChatLabMeta members: ChatLabMember[] messages: ChatLabMessage[] } // 消息类型映射:微信 localType -> ChatLab type const MESSAGE_TYPE_MAP: Record = { 1: 0, // 文本 -> TEXT 3: 1, // 图片 -> IMAGE 34: 2, // 语音 -> VOICE 43: 3, // 视频 -> VIDEO 49: 7, // 链接/文件 -> LINK (需要进一步判断) 47: 5, // 表情包 -> EMOJI 48: 8, // 位置 -> LOCATION 42: 27, // 名片 -> CONTACT 50: 23, // 通话 -> CALL 10000: 80, // 系统消息 -> SYSTEM } export interface ExportOptions { format: 'chatlab' | 'chatlab-jsonl' | 'json' | 'html' | 'txt' | 'excel' | 'sql' dateRange?: { start: number; end: number } | null exportMedia?: boolean exportAvatars?: boolean exportImages?: boolean exportVoices?: boolean exportEmojis?: boolean exportVoiceAsText?: boolean excelCompactColumns?: boolean txtColumns?: string[] sessionLayout?: 'shared' | 'per-session' } const TXT_COLUMN_DEFINITIONS: Array<{ id: string; label: string }> = [ { id: 'index', label: '序号' }, { id: 'time', label: '时间' }, { id: 'senderRole', label: '发送者身份' }, { id: 'messageType', label: '消息类型' }, { id: 'content', label: '内容' }, { id: 'senderNickname', label: '发送者昵称' }, { id: 'senderWxid', label: '发送者微信ID' }, { id: 'senderRemark', label: '发送者备注' } ] interface MediaExportItem { relativePath: string kind: 'image' | 'voice' | 'emoji' | 'video' posterDataUrl?: string } export interface ExportProgress { current: number total: number currentSession: string phase: 'preparing' | 'exporting' | 'exporting-media' | 'exporting-voice' | 'writing' | 'complete' } // 并发控制:限制同时执行的 Promise 数量 async function parallelLimit( items: T[], limit: number, fn: (item: T, index: number) => Promise ): Promise { const results: R[] = new Array(items.length) let currentIndex = 0 async function runNext(): Promise { while (currentIndex < items.length) { const index = currentIndex++ results[index] = await fn(items[index], index) } } // 启动 limit 个并发任务 const workers = Array(Math.min(limit, items.length)) .fill(null) .map(() => runNext()) await Promise.all(workers) return results } class ExportService { private configService: ConfigService private contactCache: Map = new Map() private inlineEmojiCache: Map = new Map() private htmlStyleCache: string | null = null constructor() { this.configService = new ConfigService() } private cleanAccountDirName(dirName: string): string { const trimmed = dirName.trim() if (!trimmed) return trimmed if (trimmed.toLowerCase().startsWith('wxid_')) { const match = trimmed.match(/^(wxid_[^_]+)/i) if (match) return match[1] return trimmed } const suffixMatch = trimmed.match(/^(.+)_([a-zA-Z0-9]{4})$/) if (suffixMatch) return suffixMatch[1] return trimmed } private async ensureConnected(): Promise<{ success: boolean; cleanedWxid?: string; error?: string }> { const wxid = this.configService.get('myWxid') const dbPath = this.configService.get('dbPath') const decryptKey = this.configService.get('decryptKey') if (!wxid) return { success: false, error: '请先在设置页面配置微信ID' } if (!dbPath) return { success: false, error: '请先在设置页面配置数据库路径' } if (!decryptKey) return { success: false, error: '请先在设置页面配置解密密钥' } const cleanedWxid = this.cleanAccountDirName(wxid) const ok = await wcdbService.open(dbPath, decryptKey, cleanedWxid) if (!ok) return { success: false, error: 'WCDB 打开失败' } return { success: true, cleanedWxid } } private async getContactInfo(username: string): Promise<{ displayName: string; avatarUrl?: string }> { if (this.contactCache.has(username)) { return this.contactCache.get(username)! } const [nameResult, avatarResult] = await Promise.all([ wcdbService.getDisplayNames([username]), wcdbService.getAvatarUrls([username]) ]) const displayName = (nameResult.success && nameResult.map ? nameResult.map[username] : null) || username const avatarUrl = avatarResult.success && avatarResult.map ? avatarResult.map[username] : undefined const info = { displayName, avatarUrl } this.contactCache.set(username, info) return info } /** * 解析 ext_buffer 二进制数据,提取群成员的群昵称 * ext_buffer 包含类似 protobuf 编码的数据,格式示例: * wxid_xxx群昵称wxid_yyy群昵称... */ private parseGroupNicknamesFromExtBuffer(buffer: Buffer): Map { const nicknameMap = new Map() try { // 将 buffer 转为字符串,允许部分乱码 const raw = buffer.toString('utf8') // 提取所有 wxid 格式的字符串: wxid_ 或 wxid_后跟字母数字下划线 const wxidPattern = /wxid_[a-z0-9_]+/gi const wxids = raw.match(wxidPattern) || [] // 对每个 wxid,尝试提取其后的群昵称 for (const wxid of wxids) { const wxidLower = wxid.toLowerCase() const wxidIndex = raw.toLowerCase().indexOf(wxidLower) if (wxidIndex === -1) continue // 从 wxid 结束位置开始查找 const afterWxid = raw.slice(wxidIndex + wxid.length) // 提取紧跟在 wxid 后面的可打印字符(中文、字母、数字等) // 跳过前面的不可打印字符和特定控制字符 let nickname = '' let foundStart = false for (let i = 0; i < afterWxid.length && i < 100; i++) { const char = afterWxid[i] const code = char.charCodeAt(0) // 判断是否为可打印字符(中文、字母、数字、常见符号) const isPrintable = ( (code >= 0x4E00 && code <= 0x9FFF) || // 中文 (code >= 0x3000 && code <= 0x303F) || // CJK 符号 (code >= 0xFF00 && code <= 0xFFEF) || // 全角字符 (code >= 0x20 && code <= 0x7E) // ASCII 可打印字符 ) if (isPrintable && code !== 0x01 && code !== 0x18) { foundStart = true nickname += char } else if (foundStart) { // 遇到不可打印字符,停止 break } } // 清理昵称:去除前后空白和特殊字符 nickname = nickname.trim().replace(/[\x00-\x1F\x7F]/g, '') // 只保存有效的群昵称(长度 > 0 且 < 50) if (nickname && nickname.length > 0 && nickname.length < 50) { nicknameMap.set(wxidLower, nickname) } } } catch (e) { // 解析失败时返回空 Map console.error('Failed to parse ext_buffer:', e) } return nicknameMap } /** * 从 contact.db 的 chat_room 表获取群成员的群昵称 * @param chatroomId 群聊ID (如 "xxxxx@chatroom") * @returns Map */ async getGroupNicknamesForRoom(chatroomId: string): Promise> { console.log('========== getGroupNicknamesForRoom START ==========', chatroomId) try { // 查询 contact.db 的 chat_room 表 // path设为null,因为contact.db已经随handle一起打开了 const sql = `SELECT ext_buffer FROM chat_room WHERE username = '${chatroomId.replace(/'/g, "''")}'` console.log('执行SQL查询:', sql) const result = await wcdbService.execQuery('contact', null, sql) console.log('execQuery结果:', { success: result.success, rowCount: result.rows?.length, error: result.error }) if (!result.success || !result.rows || result.rows.length === 0) { console.log('❌ 群昵称查询失败或无数据:', chatroomId, result.error) return new Map() } let extBuffer = result.rows[0].ext_buffer console.log('ext_buffer原始类型:', typeof extBuffer, 'isBuffer:', Buffer.isBuffer(extBuffer)) // execQuery返回的二进制数据会被编码为字符串(hex或base64) // 需要转换回Buffer if (typeof extBuffer === 'string') { console.log('🔄 ext_buffer是字符串,尝试转换为Buffer...') // 尝试判断是hex还是base64 if (this.looksLikeHex(extBuffer)) { console.log('✅ 检测到hex编码,使用hex解码') extBuffer = Buffer.from(extBuffer, 'hex') } else if (this.looksLikeBase64(extBuffer)) { console.log('✅ 检测到base64编码,使用base64解码') extBuffer = Buffer.from(extBuffer, 'base64') } else { // 默认尝试hex console.log('⚠️ 无法判断编码格式,默认尝试hex') try { extBuffer = Buffer.from(extBuffer, 'hex') } catch (e) { console.log('❌ hex解码失败,尝试base64') extBuffer = Buffer.from(extBuffer, 'base64') } } console.log('✅ 转换后的Buffer长度:', extBuffer.length) } if (!extBuffer || !Buffer.isBuffer(extBuffer)) { console.log('❌ ext_buffer转换失败,不是Buffer类型:', typeof extBuffer) return new Map() } console.log('✅ 开始解析ext_buffer, 长度:', extBuffer.length) const nicknamesMap = this.parseGroupNicknamesFromExtBuffer(extBuffer) console.log('✅ 解析完成, 找到', nicknamesMap.size, '个群昵称') // 打印前5个群昵称作为示例 let count = 0 for (const [wxid, nickname] of nicknamesMap.entries()) { if (count++ < 5) { console.log(` - ${wxid}: "${nickname}"`) } } return nicknamesMap } catch (e) { console.error('❌ getGroupNicknamesForRoom异常:', e) return new Map() } finally { console.log('========== getGroupNicknamesForRoom END ==========') } } /** * 转换微信消息类型到 ChatLab 类型 */ private convertMessageType(localType: number, content: string): number { if (localType === 49) { const typeMatch = /(\d+)<\/type>/i.exec(content) if (typeMatch) { const subType = parseInt(typeMatch[1]) switch (subType) { case 6: return 4 // 文件 -> FILE case 33: case 36: return 24 // 小程序 -> SHARE case 57: return 25 // 引用回复 -> REPLY default: return 7 // 链接 -> LINK } } } return MESSAGE_TYPE_MAP[localType] ?? 99 } /** * 解码消息内容 */ private decodeMessageContent(messageContent: any, compressContent: any): string { let content = this.decodeMaybeCompressed(compressContent) if (!content || content.length === 0) { content = this.decodeMaybeCompressed(messageContent) } return content } private decodeMaybeCompressed(raw: any): string { if (!raw) return '' if (typeof raw === 'string') { if (raw.length === 0) return '' if (/^[0-9]+$/.test(raw)) { return raw } if (this.looksLikeHex(raw)) { const bytes = Buffer.from(raw, 'hex') if (bytes.length > 0) return this.decodeBinaryContent(bytes) } if (this.looksLikeBase64(raw)) { try { const bytes = Buffer.from(raw, 'base64') return this.decodeBinaryContent(bytes) } catch { return raw } } return raw } return '' } private decodeBinaryContent(data: Buffer): string { if (data.length === 0) return '' try { if (data.length >= 4) { const magic = data.readUInt32LE(0) if (magic === 0xFD2FB528) { const fzstd = require('fzstd') const decompressed = fzstd.decompress(data) return Buffer.from(decompressed).toString('utf-8') } } const decoded = data.toString('utf-8') const replacementCount = (decoded.match(/\uFFFD/g) || []).length if (replacementCount < decoded.length * 0.2) { return decoded.replace(/\uFFFD/g, '') } return data.toString('latin1') } catch { return '' } } private looksLikeHex(s: string): boolean { if (s.length % 2 !== 0) return false return /^[0-9a-fA-F]+$/.test(s) } private looksLikeBase64(s: string): boolean { if (s.length % 4 !== 0) return false return /^[A-Za-z0-9+/=]+$/.test(s) } /** * 解析消息内容为可读文本 * 注意:语音消息在这里返回占位符,实际转文字在导出时异步处理 */ private parseMessageContent(content: string, localType: number): string | null { if (!content) return null switch (localType) { case 1: return this.stripSenderPrefix(content) case 3: return '[图片]' case 34: return '[语音消息]' // 占位符,导出时会替换为转文字结果 case 42: return '[名片]' case 43: return '[视频]' case 47: return '[动画表情]' case 48: return '[位置]' case 49: { const title = this.extractXmlValue(content, 'title') return title || '[链接]' } case 50: return this.parseVoipMessage(content) case 10000: return this.cleanSystemMessage(content) case 266287972401: return this.cleanSystemMessage(content) // 拍一拍 default: if (content.includes('57')) { const title = this.extractXmlValue(content, 'title') return title || '[引用消息]' } return this.stripSenderPrefix(content) || null } } private formatPlainExportContent( content: string, localType: number, options: { exportVoiceAsText?: boolean }, voiceTranscript?: string ): string { const safeContent = content || '' if (localType === 3) return '[图片]' if (localType === 1) return this.stripSenderPrefix(safeContent) if (localType === 34) { if (options.exportVoiceAsText) { return voiceTranscript || '[语音消息 - 转文字失败]' } return '[其他消息]' } if (localType === 42) { const normalized = this.normalizeAppMessageContent(safeContent) const nickname = this.extractXmlValue(normalized, 'nickname') || this.extractXmlValue(normalized, 'displayname') || this.extractXmlValue(normalized, 'name') return nickname ? `[名片]${nickname}` : '[名片]' } if (localType === 43) { const normalized = this.normalizeAppMessageContent(safeContent) const lengthValue = this.extractXmlValue(normalized, 'playlength') || this.extractXmlValue(normalized, 'playLength') || this.extractXmlValue(normalized, 'length') || this.extractXmlValue(normalized, 'duration') const seconds = lengthValue ? this.parseDurationSeconds(lengthValue) : null return seconds ? `[视频]${seconds}s` : '[视频]' } if (localType === 48) { const normalized = this.normalizeAppMessageContent(safeContent) const location = this.extractXmlValue(normalized, 'label') || this.extractXmlValue(normalized, 'poiname') || this.extractXmlValue(normalized, 'poiName') || this.extractXmlValue(normalized, 'name') return location ? `[定位]${location}` : '[定位]' } if (localType === 50) { return this.parseVoipMessage(safeContent) } if (localType === 10000 || localType === 266287972401) { return this.cleanSystemMessage(safeContent) } const normalized = this.normalizeAppMessageContent(safeContent) const isAppMessage = normalized.includes('') if (localType === 49 || isAppMessage) { const typeMatch = /(\d+)<\/type>/i.exec(normalized) const subType = typeMatch ? parseInt(typeMatch[1], 10) : 0 const title = this.extractXmlValue(normalized, 'title') || this.extractXmlValue(normalized, 'appname') if (subType === 3 || normalized.includes('= 1000) return Math.round(numeric / 1000) return Math.round(numeric) } private extractAmountFromText(text: string): string | null { if (!text) return null const match = /([¥¥]\s*\d+(?:\.\d+)?|\d+(?:\.\d+)?)/.exec(text) return match ? match[1].replace(/\s+/g, '') : null } private stripSenderPrefix(content: string): string { return content.replace(/^[\s]*([a-zA-Z0-9_-]+):(?!\/\/)/, '') } private extractXmlValue(xml: string, tagName: string): string { const regex = new RegExp(`<${tagName}>([\\s\\S]*?)<\/${tagName}>`, 'i') const match = regex.exec(xml) if (match) { return match[1].replace(//g, '').trim() } return '' } private cleanSystemMessage(content: string): string { if (!content) return '[系统消息]' // 先尝试提取特定的系统消息内容 // 1. 提取 sysmsg 中的文本内容 const sysmsgTextMatch = /]*>([\s\S]*?)<\/sysmsg>/i.exec(content) if (sysmsgTextMatch) { content = sysmsgTextMatch[1] } // 2. 提取 revokemsg 撤回消息 const revokeMatch = /<\/replacemsg>/i.exec(content) if (revokeMatch) { return revokeMatch[1].trim() } // 3. 提取 pat 拍一拍消息 const patMatch = /