api速度提升

This commit is contained in:
xuncha
2026-05-30 07:29:05 +08:00
parent 452e00fea1
commit 3fceb9b2a0
5 changed files with 892 additions and 8 deletions

View File

@@ -0,0 +1,585 @@
import { basename, extname } from 'path'
import * as fzstd from 'fzstd'
import type { Message } from './chatService'
/**
* apiMessageMapping —— HTTP API 非媒体消息的「行 -> Message」纯函数映射管线。
*
* 这是 chatService.mapRowsToMessagesLiteForApi 及其完整调用闭包(约 23 个纯辅助函数)的
* 忠实拷贝,去掉了对 `this` / configService / wcdbService / 原生层的依赖,唯一外部输入是
* 调用方传入的 `myWxid`。目的是让这套 CPU 密集hex/zstd 解压、字符清洗、key 构造)的解码
* 映射可以脱离主进程,运行在 worker 线程(见 apiMessageWorker.ts / apiMessageMapperPool.ts
* 从而既不卡住 WeFlow 本体,又能多线程并行提速。
*
* 重要:这里的逻辑必须与 chatService 中对应的私有方法保持一致。两边都是「逐行独立、无跨行状态」
* 的纯映射,因此分片/并行处理与一次性处理输出完全一致。若将来修改了 chatService 的任何解码/
* 映射细节,请同步修改本文件(反之亦然)。
*/
function encodeMessageKeySegment(value: unknown): string {
const normalized = String(value ?? '').trim()
return encodeURIComponent(normalized)
}
function getMessageSourceInfo(row: Record<string, any>): { dbName?: string; tableName?: string; dbPath?: string } {
const dbPath = String(row._db_path || row.db_path || '').trim()
const explicitDbName = String(row.db_name || '').trim()
const tableName = String(row.table_name || '').trim()
const dbName = explicitDbName || (dbPath ? basename(dbPath, extname(dbPath)) : '')
return {
dbName: dbName || undefined,
tableName: tableName || undefined,
dbPath: dbPath || undefined
}
}
function buildMessageKey(input: {
localId: number
serverId: number
createTime: number
sortSeq: number
senderUsername?: string | null
localType: number
dbName?: string
tableName?: string
dbPath?: string
}): string {
const localId = Number.isFinite(input.localId) ? Math.max(0, Math.floor(input.localId)) : 0
const serverId = Number.isFinite(input.serverId) ? Math.max(0, Math.floor(input.serverId)) : 0
const createTime = Number.isFinite(input.createTime) ? Math.max(0, Math.floor(input.createTime)) : 0
const sortSeq = Number.isFinite(input.sortSeq) ? Math.max(0, Math.floor(input.sortSeq)) : 0
const localType = Number.isFinite(input.localType) ? Math.floor(input.localType) : 0
const senderUsername = encodeMessageKeySegment(input.senderUsername || '')
const dbPath = String(input.dbPath || '').trim()
const dbName = String(input.dbName || '').trim() || (input.dbPath ? basename(input.dbPath, extname(input.dbPath)) : '')
const tableName = String(input.tableName || '').trim()
const sourceScope = dbPath || dbName
if (localId > 0 && sourceScope && tableName) {
return `${encodeMessageKeySegment(sourceScope)}:${encodeMessageKeySegment(tableName)}:${localId}`
}
if (localId > 0 && sourceScope) {
// 当底层未返回 table_name 时,避免使用 db:_:localId会误并同库不同表的消息
return `local:${encodeMessageKeySegment(sourceScope)}:${localId}:${createTime}:${sortSeq}:${senderUsername}:${localType}`
}
if (serverId > 0) {
const scopedServer = sourceScope ? `${encodeMessageKeySegment(sourceScope)}:${serverId}` : String(serverId)
return `server:${scopedServer}:${createTime}:${sortSeq}:${localId}:${senderUsername}:${localType}`
}
return `fallback:${encodeMessageKeySegment(sourceScope)}:${createTime}:${sortSeq}:${localId}:${senderUsername}:${localType}`
}
function getRowField(row: Record<string, any>, keys: string[]): any {
for (const key of keys) {
if (row[key] !== undefined && row[key] !== null) return row[key]
}
const lowerMap = new Map<string, string>()
for (const actual of Object.keys(row)) {
lowerMap.set(actual.toLowerCase(), actual)
}
for (const key of keys) {
const actual = lowerMap.get(key.toLowerCase())
if (actual && row[actual] !== undefined && row[actual] !== null) {
return row[actual]
}
}
return undefined
}
function coerceRowNumber(raw: any): number {
if (raw === undefined || raw === null) return NaN
if (typeof raw === 'number') return raw
if (typeof raw === 'bigint') return Number(raw)
if (Buffer.isBuffer(raw)) {
return coerceRowNumber(raw.toString('utf-8'))
}
if (raw instanceof Uint8Array) {
return coerceRowNumber(Buffer.from(raw).toString('utf-8'))
}
if (Array.isArray(raw)) {
return coerceRowNumber(Buffer.from(raw).toString('utf-8'))
}
if (typeof raw === 'object') {
if ('value' in raw) return coerceRowNumber(raw.value)
if ('intValue' in raw) return coerceRowNumber(raw.intValue)
if ('low' in raw && 'high' in raw) {
try {
const low = BigInt(raw.low >>> 0)
const high = BigInt(raw.high >>> 0)
return Number((high << 32n) + low)
} catch {
return NaN
}
}
const text = raw.toString ? String(raw) : ''
if (text && text !== '[object Object]') {
return coerceRowNumber(text)
}
return NaN
}
const text = String(raw).trim()
if (!text) return NaN
if (/^[+-]?\d+$/.test(text)) {
const parsed = Number(text)
return Number.isFinite(parsed) ? parsed : NaN
}
if (/^[+-]?\d+\.\d+$/.test(text)) {
const parsed = Number(text)
return Number.isFinite(parsed) ? parsed : NaN
}
return NaN
}
function getRowInt(row: Record<string, any>, keys: string[], fallback = 0): number {
const raw = getRowField(row, keys)
if (raw === undefined || raw === null || raw === '') return fallback
const parsed = coerceRowNumber(raw)
return Number.isFinite(parsed) ? parsed : fallback
}
function parseCompactDateTimeDigitsToSeconds(raw: string): number {
const text = String(raw || '').trim()
if (!/^\d{8}(?:\d{4}(?:\d{2})?)?$/.test(text)) return 0
const year = Number.parseInt(text.slice(0, 4), 10)
const month = Number.parseInt(text.slice(4, 6), 10)
const day = Number.parseInt(text.slice(6, 8), 10)
const hour = text.length >= 12 ? Number.parseInt(text.slice(8, 10), 10) : 0
const minute = text.length >= 12 ? Number.parseInt(text.slice(10, 12), 10) : 0
const second = text.length >= 14 ? Number.parseInt(text.slice(12, 14), 10) : 0
if (!Number.isFinite(year) || year < 1990 || year > 2200) return 0
if (!Number.isFinite(month) || month < 1 || month > 12) return 0
if (!Number.isFinite(day) || day < 1 || day > 31) return 0
if (!Number.isFinite(hour) || hour < 0 || hour > 23) return 0
if (!Number.isFinite(minute) || minute < 0 || minute > 59) return 0
if (!Number.isFinite(second) || second < 0 || second > 59) return 0
const dt = new Date(year, month - 1, day, hour, minute, second)
if (
dt.getFullYear() !== year ||
dt.getMonth() !== month - 1 ||
dt.getDate() !== day ||
dt.getHours() !== hour ||
dt.getMinutes() !== minute ||
dt.getSeconds() !== second
) {
return 0
}
const ts = Math.floor(dt.getTime() / 1000)
return Number.isFinite(ts) && ts > 0 ? ts : 0
}
function parseDateTimeTextToSeconds(raw: unknown): number {
const text = String(raw ?? '').trim()
if (!text) return 0
const compactDigits = parseCompactDateTimeDigitsToSeconds(text)
if (compactDigits > 0) return compactDigits
if (/[zZ]|[+-]\d{2}:?\d{2}$/.test(text)) {
const parsed = Date.parse(text)
const seconds = Math.floor(parsed / 1000)
if (Number.isFinite(seconds) && seconds > 0) return seconds
}
const normalized = text.replace('T', ' ').replace(/\.\d+$/, '').replace(/\//g, '-')
const match = normalized.match(/^(\d{4})-(\d{1,2})-(\d{1,2})(?:\s+(\d{1,2}):(\d{1,2})(?::(\d{1,2}))?)?$/)
if (!match) return 0
const year = Number.parseInt(match[1], 10)
const month = Number.parseInt(match[2], 10)
const day = Number.parseInt(match[3], 10)
const hour = Number.parseInt(match[4] || '0', 10)
const minute = Number.parseInt(match[5] || '0', 10)
const second = Number.parseInt(match[6] || '0', 10)
if (!Number.isFinite(year) || !Number.isFinite(month) || !Number.isFinite(day)) return 0
const dt = new Date(year, month - 1, day, hour, minute, second)
const ts = Math.floor(dt.getTime() / 1000)
return Number.isFinite(ts) && ts > 0 ? ts : 0
}
function normalizeTimestampLikeToSeconds(raw: unknown): number {
if (raw === undefined || raw === null || raw === '') return 0
const text = String(raw ?? '').trim()
if (!text) return 0
const compactDigits = parseCompactDateTimeDigitsToSeconds(text)
if (compactDigits > 0) return compactDigits
const parsed = coerceRowNumber(raw)
if (Number.isFinite(parsed) && parsed > 0) {
let normalized = Math.floor(parsed)
while (normalized > 10000000000) {
normalized = Math.floor(normalized / 1000)
}
return normalized
}
return parseDateTimeTextToSeconds(text)
}
function getRowTimestampSeconds(row: Record<string, any>, keys: string[], fallback = 0): number {
const raw = getRowField(row, keys)
if (raw === undefined || raw === null || raw === '') return fallback
const parsed = normalizeTimestampLikeToSeconds(raw)
return parsed > 0 ? parsed : fallback
}
function normalizeUnsignedIntegerToken(raw: any): string | undefined {
if (raw === undefined || raw === null || raw === '') return undefined
if (typeof raw === 'bigint') {
return raw >= 0n ? raw.toString() : '0'
}
if (typeof raw === 'number') {
if (!Number.isFinite(raw)) return undefined
return String(Math.max(0, Math.floor(raw)))
}
if (Buffer.isBuffer(raw)) {
return normalizeUnsignedIntegerToken(raw.toString('utf-8').trim())
}
if (raw instanceof Uint8Array) {
return normalizeUnsignedIntegerToken(Buffer.from(raw).toString('utf-8').trim())
}
if (Array.isArray(raw)) {
return normalizeUnsignedIntegerToken(Buffer.from(raw).toString('utf-8').trim())
}
if (typeof raw === 'object') {
if ('value' in raw) return normalizeUnsignedIntegerToken(raw.value)
if ('intValue' in raw) return normalizeUnsignedIntegerToken(raw.intValue)
if ('low' in raw && 'high' in raw) {
try {
const low = BigInt(raw.low >>> 0)
const high = BigInt(raw.high >>> 0)
const value = (high << 32n) + low
return value >= 0n ? value.toString() : '0'
} catch {
return undefined
}
}
const text = raw.toString ? String(raw).trim() : ''
if (text && text !== '[object Object]') {
return normalizeUnsignedIntegerToken(text)
}
return undefined
}
const text = String(raw).trim()
if (!text) return undefined
if (/^\d+$/.test(text)) {
return text.replace(/^0+(?=\d)/, '') || '0'
}
if (/^[+-]?\d+$/.test(text)) {
try {
const value = BigInt(text)
return value >= 0n ? value.toString() : '0'
} catch {
return undefined
}
}
const parsed = Number(text)
if (Number.isFinite(parsed)) {
return String(Math.max(0, Math.floor(parsed)))
}
return undefined
}
function cleanAccountDirName(dirName: string): string {
const trimmed = dirName.trim()
if (!trimmed) return trimmed
if (trimmed.toLowerCase().startsWith('wxid_')) {
const match = trimmed.match(/^(wxid_[^_]+)/i)
if (match) return match[1]
return trimmed
}
const suffixMatch = trimmed.match(/^(.+)_([a-zA-Z0-9]{4})$/)
const cleaned = suffixMatch ? suffixMatch[1] : trimmed
return cleaned
}
function buildIdentityKeys(raw: string): string[] {
const value = String(raw || '').trim()
if (!value) return []
const lowerRaw = value.toLowerCase()
const cleaned = cleanAccountDirName(value).toLowerCase()
if (cleaned && cleaned !== lowerRaw) {
return [cleaned, lowerRaw]
}
return [lowerRaw]
}
/**
* 判断消息是否由「我」发出。与 chatService.resolveMessageIsSend 等价,但 myWxid 由参数传入
* chatService 版本从 configService.getMyWxidCleaned() 读取)。
*/
function resolveMessageIsSend(
rawIsSend: number | null,
senderUsername: string | null | undefined,
myWxidRaw: string
): { isSend: number | null; selfMatched: boolean; correctedBySelfIdentity: boolean } {
const normalizedRawIsSend = Number.isFinite(rawIsSend as number) ? rawIsSend : null
const senderKeys = buildIdentityKeys(String(senderUsername || ''))
if (senderKeys.length === 0) {
return {
isSend: normalizedRawIsSend,
selfMatched: false,
correctedBySelfIdentity: false
}
}
const myWxid = String(myWxidRaw || '').trim()
const selfKeys = buildIdentityKeys(myWxid)
if (selfKeys.length === 0) {
return {
isSend: normalizedRawIsSend,
selfMatched: false,
correctedBySelfIdentity: false
}
}
const selfMatched = senderKeys.some(senderKey =>
selfKeys.some(selfKey =>
senderKey === selfKey ||
senderKey.startsWith(selfKey + '_') ||
selfKey.startsWith(senderKey + '_')
)
)
if (selfMatched && normalizedRawIsSend !== 1) {
return {
isSend: 1,
selfMatched: true,
correctedBySelfIdentity: true
}
}
if (normalizedRawIsSend === null) {
return {
isSend: selfMatched ? 1 : 0,
selfMatched,
correctedBySelfIdentity: false
}
}
return {
isSend: normalizedRawIsSend,
selfMatched,
correctedBySelfIdentity: false
}
}
function decodeHtmlEntities(content: string): string {
return content
.replace(/&amp;/g, '&')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&quot;/g, '"')
.replace(/&apos;/g, "'")
}
function cleanUtf16(input: string): string {
if (!input) return input
try {
const cleaned = input.replace(/[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F-\x9F]/g, '')
const codeUnits = cleaned.split('').map((c) => c.charCodeAt(0))
const validUnits: number[] = []
for (let i = 0; i < codeUnits.length; i += 1) {
const unit = codeUnits[i]
if (unit >= 0xd800 && unit <= 0xdbff) {
if (i + 1 < codeUnits.length) {
const nextUnit = codeUnits[i + 1]
if (nextUnit >= 0xdc00 && nextUnit <= 0xdfff) {
validUnits.push(unit, nextUnit)
i += 1
continue
}
}
continue
}
if (unit >= 0xdc00 && unit <= 0xdfff) {
continue
}
validUnits.push(unit)
}
return String.fromCharCode(...validUnits)
} catch {
return input.replace(/[^ -~一-鿿 -〿]/g, '')
}
}
function extractSenderUsernameFromContent(content: string): string | null {
if (!content) return null
const normalized = cleanUtf16(decodeHtmlEntities(String(content)))
const match = /^\s*([a-zA-Z0-9_@-]{4,}):(?!\/\/)\s*(?:\r?\n|<br\s*\/?>)/i.exec(normalized)
if (!match?.[1]) return null
const candidate = match[1].trim()
return candidate || null
}
function compactEncodedPayload(raw: string): string {
return String(raw || '').replace(/\s+/g, '').trim()
}
function looksLikeHex(s: string): boolean {
const compact = compactEncodedPayload(s)
if (compact.length % 2 !== 0) return false
return /^[0-9a-fA-F]+$/.test(compact)
}
function looksLikeBase64(s: string): boolean {
const compact = compactEncodedPayload(s)
if (compact.length % 4 !== 0) return false
return /^[A-Za-z0-9+/=]+$/.test(compact)
}
function decodeBinaryContent(data: Buffer, fallbackValue?: string): string {
if (data.length === 0) return ''
try {
// 检查是否是 zstd 压缩数据 (magic number: 0xFD2FB528)
if (data.length >= 4) {
const magicLE = data.readUInt32LE(0)
const magicBE = data.readUInt32BE(0)
if (magicLE === 0xFD2FB528 || magicBE === 0xFD2FB528) {
// zstd 压缩,需要解压
try {
const decompressed = fzstd.decompress(data)
return Buffer.from(decompressed).toString('utf-8')
} catch (e) {
console.error('zstd 解压失败:', e)
}
}
}
// 尝试直接 UTF-8 解码
const decoded = data.toString('utf-8')
// 检查是否有太多替换字符
const replacementCount = (decoded.match(/<2F>/g) || []).length
if (replacementCount < decoded.length * 0.2) {
return decoded.replace(/<2F>/g, '')
}
// 如果提供了 fallbackValue且解码结果看起来像二进制垃圾则返回 fallbackValue
if (fallbackValue && replacementCount > 0) {
return fallbackValue
}
// 尝试 latin1 解码
return data.toString('latin1')
} catch {
return fallbackValue || ''
}
}
function decodeMaybeCompressed(raw: any): string {
if (!raw) return ''
// 如果是 Buffer/Uint8Array
if (Buffer.isBuffer(raw) || raw instanceof Uint8Array) {
return decodeBinaryContent(Buffer.from(raw), String(raw))
}
// 如果是字符串
if (typeof raw === 'string') {
if (raw.length === 0) return ''
const compactRaw = compactEncodedPayload(raw)
// 检查是否是 hex 编码
if (compactRaw.length > 16 && looksLikeHex(compactRaw)) {
const bytes = Buffer.from(compactRaw, 'hex')
if (bytes.length > 0) {
return decodeBinaryContent(bytes, raw)
}
}
// 检查是否是 base64 编码
if (compactRaw.length > 16 && looksLikeBase64(compactRaw)) {
try {
const bytes = Buffer.from(compactRaw, 'base64')
return decodeBinaryContent(bytes, raw)
} catch { }
}
// 普通字符串
return raw
}
return ''
}
function decodeMessageContent(messageContent: any, compressContent: any): string {
// 优先使用 compress_content
let content = decodeMaybeCompressed(compressContent)
if (!content || content.length === 0) {
content = decodeMaybeCompressed(messageContent)
}
return content
}
/**
* 行 -> Message[] 的轻量映射(非媒体)。等价于 chatService.mapRowsToMessagesLiteForApi
* 但是纯函数myWxid 由参数传入),可在 worker 线程运行。逐行独立、无跨行状态。
*/
export function mapRowsToMessagesLite(rows: Record<string, any>[], myWxidRaw: string): Message[] {
const myWxid = String(myWxidRaw || '').trim()
const messages: Message[] = []
for (const row of rows) {
const sourceInfo = getMessageSourceInfo(row)
const localType = getRowInt(row, ['local_type'], 1)
const createTime = getRowTimestampSeconds(row, ['create_time', 'createTime', 'msg_time', 'msgTime', 'time'], 0)
const sortSeq = getRowInt(row, ['sort_seq'], createTime > 0 ? createTime * 1000 : 0)
const localId = getRowInt(row, ['local_id'], 0)
const serverIdRaw = normalizeUnsignedIntegerToken(row.server_id)
const serverId = getRowInt(row, ['server_id'], 0)
const content = decodeMessageContent(row.message_content, row.compress_content)
const isSendRaw = row.computed_is_send ?? row.is_send
const parsedRawIsSend = isSendRaw === null || isSendRaw === undefined
? null
: parseInt(String(isSendRaw), 10)
const normalizedIsSend = typeof parsedRawIsSend === 'number' && Number.isFinite(parsedRawIsSend)
? parsedRawIsSend
: null
const senderFromRow = String(row.sender_username || '').trim() || extractSenderUsernameFromContent(content) || null
const { isSend } = resolveMessageIsSend(normalizedIsSend, senderFromRow, myWxid)
const senderUsername = senderFromRow || (isSend === 1 && myWxid ? myWxid : null)
messages.push({
messageKey: buildMessageKey({
localId,
serverId,
createTime,
sortSeq,
senderUsername,
localType,
...sourceInfo
}),
localId,
serverId,
serverIdRaw,
localType,
createTime,
sortSeq,
isSend,
senderUsername,
parsedContent: '',
rawContent: content,
content,
_db_path: sourceInfo.dbPath
} as Message)
}
return messages
}