--[[ super_replacer.lua 一个rime OpenCC替代品,更灵活地配置能力 https://github.com/amzxyz/rime_wanxiang by amzxyz 路径检测:UserDir > SharedDir 支持 option: true (常驻启用) super_replacer: db_name: lua/replacer delimiter: "|" comment_format: "〔%s〕" chain: true #true表示流水线作业,上一个option产出交给下一个处理,典型的s2t>t2hk=s2hk,false就是并行,直接用text转换 types: # 场景1:输入 '哈哈' -> 变成 '1.哈哈 2.😄' - option: emoji # 开关名称与上面开关名称保持一致 mode: append # 新增候选append 替换原候选replace 替换注释comment comment_mode: none # 注释模式: "append"(原候选注释继承), "text"(原候选文本放在注释), "none"(空,默认) tags: [abc] # 生效的tag prefix: "_em_" # 前缀用于区分同一个数据库的不同用途数据 files: - lua/data/emoji.txt # 场景2:输入 'hello' -> 显示 'hello 〔你好 | 哈喽〕' - option: chinese_english mode: append # <--- 添加注释模式 comment_mode: none tags: [abc] prefix: "_en_" files: - lua/data/english_chinese.txt - lua/data/chinese_english.txt # 场景3:用于常驻的直接替换 option: true - option: true mode: append # <--- 新增候选模式 comment_mode: none tags: [abc] prefix: "_ot_" files: - lua/data/others.txt # 场景4:用于简繁转换的直接替换 - option: [ s2t, s2hk, s2tw ] #后面依赖这条流水线有一个开关为true这条流水线就能工作 mode: replace # <--- 替换原候选模式 comment_mode: append sentence: true # <--- 句子级别替换 tags: [abc] prefix: "_s2t_" files: - lua/data/STCharacters.txt - lua/data/STPhrases.txt - option: s2hk mode: replace # <--- 替换原候选模式 comment_mode: append sentence: true # <--- 句子级别替换 tags: [abc] prefix: "_s2hk_" files: - lua/data/HKVariants.txt - lua/data/HKVariantsRevPhrases.txt - option: s2tw mode: replace # <--- 替换原候选模式 comment_mode: append sentence: true # <--- 句子级别替换 tags: [abc] prefix: "_s2tw_" files: - lua/data/TWVariants.txt - lua/data/TWVariantsRevPhrases.txt ]] local M = {} -- 性能优化:本地化常用库函数 local insert = table.insert local concat = table.concat local s_match = string.match local s_gmatch = string.gmatch local s_format = string.format local s_byte = string.byte local s_sub = string.sub local s_gsub = string.gsub local open = io.open local type = type local tonumber = tonumber -- 基础依赖 local function safe_require(name) local status, lib = pcall(require, name) if status then return lib end return nil end local userdb = safe_require("lib/userdb") or safe_require("userdb") local wanxiang = safe_require("wanxiang") -- 重建数据库 (仅在 wanxiang 版本变更时运行) local function rebuild(tasks, db) if db.empty then db:empty() end for _, task in ipairs(tasks) do local txt_path = task.path local prefix = task.prefix local f = open(txt_path, "r") if f then for line in f:lines() do if line ~= "" and not s_match(line, "^%s*#") then local k, v = s_match(line, "^(%S+)%s+(.+)") if k and v then v = s_match(v, "^%s*(.-)%s*$") db:update(prefix .. k, v) end end end f:close() else if log and log.info then log.info("super_replacer: 无法读取文件: " .. txt_path) end end end return true end -- UTF-8 辅助 local function get_utf8_offsets(text) local offsets = {} local len = #text local i = 1 while i <= len do insert(offsets, i) local b = s_byte(text, i) if b < 128 then i = i + 1 elseif b < 224 then i = i + 2 elseif b < 240 then i = i + 3 else i = i + 4 end end insert(offsets, len + 1) return offsets end -- FMM 分词转换算法 local function segment_convert(text, db, prefix, split_pat) local offsets = get_utf8_offsets(text) local char_count = #offsets - 1 local result_parts = {} local i = 1 local MAX_LOOKAHEAD = 6 while i <= char_count do local matched = false local max_j = i + MAX_LOOKAHEAD if max_j > char_count + 1 then max_j = char_count + 1 end for j = max_j - 1, i + 1, -1 do local start_byte = offsets[i] local end_byte = offsets[j] - 1 local sub_text = s_sub(text, start_byte, end_byte) local val = db:fetch(prefix .. sub_text) if val then local first_val = s_match(val, split_pat) insert(result_parts, first_val or sub_text) i = j - 1 matched = true break end end if not matched then local start_byte = offsets[i] local end_byte = offsets[i+1] - 1 local char = s_sub(text, start_byte, end_byte) local val = db:fetch(prefix .. char) if val then local first_val = s_match(val, split_pat) insert(result_parts, first_val or char) else insert(result_parts, char) end end i = i + 1 end return concat(result_parts) end -- 模块接口 function M.init(env) local ns = env.name_space ns = s_gsub(ns, "^%*", "") local config = env.engine.schema.config local user_dir = rime_api:get_user_data_dir() local shared_dir = rime_api:get_shared_data_dir() -- 1. 基础配置 local db_name = config:get_string(ns .. "/db_name") or "lua/replacer" local delim = config:get_string(ns .. "/delimiter") or "|" env.delimiter = delim env.comment_format = config:get_string(ns .. "/comment_format") or "〔%s〕" -- 获取全局版本号 local current_version = "v0.0.0" if wanxiang and wanxiang.version then current_version = wanxiang.version end env.chain = config:get_bool(ns .. "/chain") if env.chain == nil then env.chain = false end if delim == " " then env.split_pattern = "%S+" else local esc = s_gsub(delim, "[%-%.%+%[%]%(%)%$%^%%%?%*]", "%%%1"); env.split_pattern = "([^" .. esc .. "]+)" end -- 2. 解析 Types env.types = {} local tasks = {} -- 仅在需要重建时使用 local function resolve_path(relative) if not relative then return nil end local user_path = user_dir .. "/" .. relative local f = open(user_path, "r") if f then f:close(); return user_path end local shared_path = shared_dir .. "/" .. relative f = open(shared_path, "r") if f then f:close(); return shared_path end return user_path end local types_path = ns .. "/types" local type_list = config:get_list(types_path) if type_list then for i = 0, type_list.size - 1 do local entry_path = types_path .. "/@" .. i -- 解析 triggers local triggers = {} local opts_keys = {"option", "options"} for _, key in ipairs(opts_keys) do local key_path = entry_path .. "/" .. key local list = config:get_list(key_path) if list then for k = 0, list.size - 1 do local val = config:get_string(key_path .. "/@" .. k) if val then insert(triggers, val) end end else -- 1. 如果配置写的是 true (bool),get_bool 返回 true,我们插入布尔值 true。 -- 2. 如果配置写的是 s2t (string),get_bool 返回 false (或nil),我们进入 else 读字符串。 if config:get_bool(key_path) == true then insert(triggers, true) else local val = config:get_string(key_path) -- 只有当它不是 "true" 字符串时才插入,防止双重解析(虽然上面的if已经拦截了) if val and val ~= "true" then insert(triggers, val) end end end end -- 解析 Tags local target_tags = nil local tag_keys = {"tag", "tags"} for _, key in ipairs(tag_keys) do local key_path = entry_path .. "/" .. key local list = config:get_list(key_path) if list then if not target_tags then target_tags = {} end for k = 0, list.size - 1 do local val = config:get_string(key_path .. "/@" .. k) if val then target_tags[val] = true end end else local val = config:get_string(key_path) if val then if not target_tags then target_tags = {} end target_tags[val] = true end end end if #triggers > 0 then local prefix = config:get_string(entry_path .. "/prefix") or "" local mode = config:get_string(entry_path .. "/mode") or "append" local comment_mode = config:get_string(entry_path .. "/comment_mode") if not comment_mode then comment_mode = "comment" end local fmm = config:get_bool(entry_path .. "/sentence") if fmm == nil then fmm = false end insert(env.types, { triggers = triggers, tags = target_tags, prefix = prefix, mode = mode, comment_mode = comment_mode, fmm = fmm }) -- 收集文件路径 (用于重建) local keys_to_check = {"files", "file"} for _, key in ipairs(keys_to_check) do local d_path = entry_path .. "/" .. key local list = config:get_list(d_path) if list then for j = 0, list.size - 1 do local p = resolve_path(config:get_string(d_path .. "/@" .. j)) if p then insert(tasks, { path = p, prefix = prefix }) end end else local p = resolve_path(config:get_string(d_path)) if p then insert(tasks, { path = p, prefix = prefix }) end end end end end end -- 3. DB 初始化 (后面逻辑保持不变) if not userdb then return end local ok, db = pcall(function() local d = userdb.LevelDb(db_name); d:open(); return d end) if ok and db then env.db = db local db_version = db:meta_fetch("_wanxiang_ver") or "" local old_delim = db:meta_fetch("_delim") local need_rebuild = false if current_version ~= db_version then need_rebuild = true end if env.delimiter ~= old_delim then need_rebuild = true end if need_rebuild then if rebuild(tasks, db) then db:meta_update("_wanxiang_ver", current_version) db:meta_update("_delim", env.delimiter) if log and log.info then log.info("super_replacer: 检测到版本变更 (" .. db_version .. " -> " .. current_version .. "),数据已重建。") end end end else env.db = nil end end function M.fini(env) if env.db then env.db:close(); env.db = nil end end function M.func(input, env) if not env.types or #env.types == 0 or not env.db then for cand in input:iter() do yield(cand) end return end local ctx = env.engine.context local db = env.db local types = env.types local split_pat = env.split_pattern local comment_fmt = env.comment_format local is_chain = env.chain local seg = ctx.composition:back() local current_seg_tags = seg and seg.tags or {} for cand in input:iter() do local current_text = cand.text local show_main = true local current_main_comment = cand.comment local pending_candidates = {} local comments = {} for _, t in ipairs(types) do local is_active = false for _, trigger in ipairs(t.triggers) do if trigger == true then is_active = true; break elseif type(trigger) == "string" and ctx:get_option(trigger) then is_active = true; break end end local is_tag_match = true if t.tags then is_tag_match = false for req_tag, _ in pairs(t.tags) do if current_seg_tags[req_tag] then is_tag_match = true; break end end end if is_active and is_tag_match then local query_text = is_chain and current_text or cand.text local key = t.prefix .. query_text local val = db:fetch(key) if not val and t.fmm then local seg_result = segment_convert(query_text, db, t.prefix, split_pat) if seg_result ~= query_text then val = seg_result end end if val then local mode = t.mode local rule_comment = "" if t.comment_mode == "text" then rule_comment = cand.text elseif t.comment_mode == "comment" then rule_comment = cand.comment end if mode == "comment" then local parts = {} for p in s_gmatch(val, split_pat) do insert(parts, p) end insert(comments, concat(parts, " ")) elseif mode == "replace" then if is_chain then local first = true for p in s_gmatch(val, split_pat) do if first then current_text = p if t.comment_mode == "none" then current_main_comment = "" elseif t.comment_mode == "text" then current_main_comment = cand.text end first = false else insert(pending_candidates, { text=p, comment=rule_comment }) end end else show_main = false for p in s_gmatch(val, split_pat) do insert(pending_candidates, { text=p, comment=rule_comment }) end end elseif mode == "append" then for p in s_gmatch(val, split_pat) do insert(pending_candidates, { text=p, comment=rule_comment }) end end end end end if #comments > 0 then local comment_str = concat(comments, " ") local fmt = s_format(comment_fmt, comment_str) if cand.comment and cand.comment ~= "" then cand.comment = cand.comment .. fmt else cand.comment = fmt end end if show_main then if is_chain and current_text ~= cand.text then local nc = Candidate("kv", cand.start, cand._end, current_text, current_main_comment) nc.preedit = cand.preedit nc.quality = cand.quality yield(nc) else yield(cand) end end for _, item in ipairs(pending_candidates) do if not (show_main and item.text == current_text) then local nc = Candidate("kv", cand.start, cand._end, item.text, item.comment) nc.preedit = cand.preedit nc.quality = cand.quality yield(nc) end end end end return M