--@amzxyz https://github.com/amzxyz/rime_wanxiang local wanxiang = require('wanxiang') local tone_map = { ['ā']='a', ['á']='a', ['ǎ']='a', ['à']='a', ['ē']='e', ['é']='e', ['ě']='e', ['è']='e', ['ī']='i', ['í']='i', ['ǐ']='i', ['ì']='i', ['ō']='o', ['ó']='o', ['ǒ']='o', ['ò']='o', ['ň']='n', ['ū']='u', ['ú']='u', ['ǔ']='u', ['ù']='u', ['ǹ']='n', ['ǖ']='ü', ['ǘ']='ü', ['ǚ']='ü', ['ǜ']='ü', ['ń']='n', } local function remove_pinyin_tone(s) local result = {} for uchar in s:gmatch("[%z\1-\127\194-\244][\128-\191]*") do table.insert(result, tone_map[uchar] or uchar) end return table.concat(result) end -- ---------------------- -- # 辅助码拆分提示模块 -- PRO 专用 -- ---------------------- local CF = {} function CF.init(env) if wanxiang.is_pro_scheme(env) then -- pro 版直接初始化 CF.get_dict(env) end end function CF.fini(env) env.chaifen_dict = nil collectgarbage() end function CF.get_dict(env) if env.chaifen_dict == nil then env.chaifen_dict = ReverseLookup("wanxiang_chaifen") end return env.chaifen_dict end function CF.get_comment(cand, env) local dict = CF.get_dict(env) if not dict then return "" end local raw = dict:lookup(cand.text) if not raw or raw == "" then return "" end local tpl = (env and env.settings and env.settings.chaifen) or "" if tpl ~= "" then -- 取 chaifen 左右两边 local left, right = tpl:match("^(.-)chaifen(.-)$") if left then return left .. raw .. right end end return raw end -- ---------------------- -- # 错音错字提示模块 -- ---------------------- local CR = {} local corrections_cache = nil -- 用于缓存已加载的词典 function CR.init(env) CR.style = env.settings.corrector_type or '{comment}' --if corrections_cache then return end local auto_delimiter = env.settings.auto_delimiter local is_pro = wanxiang.is_pro_scheme(env) -- 根据方案选择加载路径 local path = (is_pro and "dicts/cuoyin.pro.dict.yaml") or "dicts/cuoyin.dict.yaml" local file, close_file, err = wanxiang.load_file_with_fallback(path) if not file then log.error(string.format("[super_comment]: 加载失败 %s,错误: %s", path, err)) return end corrections_cache = {} for line in file:lines() do if not line:match("^#") then local text, code, weight, comment = line:match("^(.-)\t(.-)\t(.-)\t(.-)$") if text and code then text = text:match("^%s*(.-)%s*$") code = code:match("^%s*(.-)%s*$") comment = comment and comment:match("^%s*(.-)%s*$") or "" comment = comment:gsub("%s+", auto_delimiter) code = code:gsub("%s+", auto_delimiter) corrections_cache[code] = { text = text, comment = comment } end end end close_file() end function CR.get_comment(cand) local correction = corrections_cache and corrections_cache[cand.comment] or nil if not (correction and cand.text == correction.text) then return nil end -- 只认占位符 `comment`,按“刀法”切分 local tpl = CR.style or "comment" local left, right = tpl:match("^(.-)comment(.-)$") if left then return left .. correction.comment .. right else return correction.comment end end -- ---------------------- -- 部件组字返回的注释 -- ---------------------- ---@return string local function get_az_comment(_, env, initial_comment) if not initial_comment or initial_comment == "" then return "〔无〕" end local final_comment = nil local auto_delimiter = env.settings.auto_delimiter or " " -- 拆分初始评论为多个段落 local segments = {} for segment in initial_comment:gmatch("[^%s]+") do table.insert(segments, segment) end local semicolon_count = select(2, segments[1]:gsub(";", "")) -- 使用第一个段来判断分号的数量 local pinyins = {} local fuzhu = nil for _, segment in ipairs(segments) do local pinyin = segment:match("^[^;~]+") local fz = nil if semicolon_count == 1 then -- 一个分号:取后段 fz = segment:match(";(.+)$") else -- 无分号不取辅助码 fz = nil end if pinyin then table.insert(pinyins, pinyin) end if not fuzhu and fz and fz ~= "" then fuzhu = fz end end -- 拼接结果 if #pinyins > 0 then local pinyin_str = table.concat(pinyins, ",") if fuzhu then final_comment = string.format("〔音%s 辅%s〕", pinyin_str, fuzhu) else final_comment = string.format("〔音%s〕", pinyin_str) end end return final_comment or "〔无〕" end -- ---------------------- -- # 辅助码提示或带调全拼注释模块 (Fuzhu) -- ---------------------- local function get_fz_comment(cand, env, initial_comment) local length = utf8.len(cand.text) if length > env.settings.candidate_length then return "" end local auto_delimiter = env.settings.auto_delimiter or " " local segments = {} for segment in string.gmatch(initial_comment, "[^" .. auto_delimiter .. "]+") do table.insert(segments, segment) end -- 根据 option 动态决定是否强制使用 tone local use_tone = env.engine.context:get_option("tone_hint") local fuzhu_type = use_tone and "tone" or "fuzhu" local first_segment = segments[1] or "" local semicolon_count = select(2, first_segment:gsub(";", "")) local fuzhu_comments = {} -- 没有分号的情况 if semicolon_count == 0 then return initial_comment:gsub(auto_delimiter, " ") else -- 有分号:按类型提取 for _, segment in ipairs(segments) do if fuzhu_type == "tone" then -- 取第一个分号“前”的内容 local before = segment:match("^(.-);") if before and before ~= "" then table.insert(fuzhu_comments, before) end else -- "fuzhu" -- 取第一个分号“后”的内容(到行尾) local after = segment:match(";(.+)$") if after and after ~= "" then table.insert(fuzhu_comments, after) end end end end -- 最终拼接输出,fuzhu用 `,`,tone用 /连接 if #fuzhu_comments > 0 then if fuzhu_type == "tone" then return table.concat(fuzhu_comments, " ") else return table.concat(fuzhu_comments, "/") end else return "" end end local SV = {} -- 工具:取光标前的编码(安全处理 caret 越界) local function front_input(ctx) if not ctx then return "" end local raw_full = ctx.input or "" local caret = ctx.caret_pos or #raw_full if caret < 0 then caret = 0 elseif caret > #raw_full then caret = #raw_full end return raw_full:sub(1, caret) end -- 这个模块主要用于将滤镜阶段未修改前的注释或者 preedit -- 存到上下文变量里,按键处理阶段使用;update_notifier 保证一致性 function SV.init(env) env._sv_seq_sig = "" env._sv_last_pre = "" -- 最近一次要写入的 preedit env._saved_input_for_seq = "" -- 上次对应的 raw_in(光标前编码) local ctx = env.engine.context env._sv_ctx_conn = ctx.update_notifier:connect(function(c) local raw_in = front_input(c) local pre = env._sv_last_pre or "" if pre == "" or raw_in == "" then return end -- 不重写:光标前编码 + preedit local sig = raw_in .. "\t" .. pre if env._sv_seq_sig == sig then return end c:set_property("sequence_preedit_key", raw_in) c:set_property("sequence_preedit_val", pre) env._sv_seq_sig = sig end) end -- 断开 notifier,清理状态 function SV.fini(env) if env._sv_ctx_conn then env._sv_ctx_conn:disconnect() env._sv_ctx_conn = nil end env._sv_seq_sig = nil env._sv_last_pre = nil env._saved_input_for_seq = nil end -- 限制更新范围:同一个 raw_in 只记第一次的 preedit function SV.update_preedit(env, preedit) local ctx = env.engine.context if not ctx then return end local raw_in = front_input(ctx) preedit = preedit or "" if raw_in == "" or preedit == "" then return end if env._saved_input_for_seq ~= raw_in then env._saved_input_for_seq = raw_in env._sv_last_pre = preedit end end -- 对 cand.preedit 应用 tone_preedit/0..9 的映射(数字 -> 上标等) local function apply_tone_preedit(env, cand) if not cand or not cand.preedit or cand.preedit == "" then return end -- 用 context.input 判断是否有相邻数字 local input local engine = env.engine if engine and engine.context then -- Rime 里一般是 string,保险起见兜个 nil input = engine.context.input or "" end -- 如果整条输入串中存在相邻两个数字(例如 "li39"、"abc10" 等), -- 则整体不做任何转换,直接返回,为了配合小键盘输入逻辑中包吃书字面大小一致性 if input and input ~= "" and input:match("%d%d") then return end -- 懒加载 tone_map if not env.tone_map then env.tone_map = {} local cfg = engine and engine.schema and engine.schema.config if cfg then for d = 0, 9 do local k = tostring(d) local v = cfg:get_string("tone_preedit/" .. k) if v and v ~= "" then env.tone_map[k] = v end end end end local preedit = cand.preedit local converted = preedit:gsub("([^%d%s]+)(%d+)", function(body, digits) local mapped = digits:gsub("%d", function(d) return env.tone_map and env.tone_map[d] or d end) return body .. mapped end) if converted ~= preedit then cand.preedit = converted end end -- ---------------------- -- 主函数:根据优先级处理候选词的注释和preedit -- ---------------------- local ZH = {} function ZH.init(env) local config = env.engine.schema.config local delimiter = config:get_string('speller/delimiter') or " '" local auto_delimiter = delimiter:sub(1, 1) local manual_delimiter = delimiter:sub(2, 2) env.settings = { delimiter = delimiter, auto_delimiter = auto_delimiter, manual_delimiter = manual_delimiter, corrector_enabled = config:get_bool("super_comment/corrector") or true, corrector_type = config:get_string("super_comment/corrector_type") or "{comment}", chaifen = config:get_string("super_comment/chaifen") or "〔chaifen〕", candidate_length = tonumber(config:get_string("super_comment/candidate_length")) or 1, } CR.init(env) SV.init(env) end function ZH.fini(env) -- 清理 CF.fini(env) SV.fini(env) end function ZH.func(input, env) local config = env.engine.schema.config local context = env.engine.context local input_str = context.input local is_radical_mode = wanxiang.is_in_radical_mode(env) local schema_id = env.engine.schema.schema_id or "" local is_wanxiang_pro = (schema_id == "wanxiang_pro") local should_skip_candidate_comment = wanxiang.is_function_mode_active(context) or input_str == "" local is_tone_comment = env.engine.context:get_option("tone_hint") local is_comment_hint = env.engine.context:get_option("fuzhu_hint") local is_chaifen_enabled = env.engine.context:get_option("chaifen_switch") --preedit相关声明 local delimiter = env.settings.delimiter local auto_delimiter = env.settings.auto_delimiter local manual_delimiter = env.settings.manual_delimiter local visual_delim = config:get_string("speller/visual_delimiter") or " " local tone_isolate = config:get_bool("speller/tone_isolate") local is_tone_display = context:get_option("tone_display") local is_full_pinyin = context:get_option("full_pinyin") local index = 0 -- auto_phrase 相关声明 local enable_auto_phrase = config:get_bool("add_user_dict/enable_auto_phrase") or false local enable_user_dict = config:get_bool("add_user_dict/enable_user_dict") or false for cand in input:iter() do local genuine_cand = cand:get_genuine() local preedit = genuine_cand.preedit or "" local initial_comment = genuine_cand.comment local final_comment = initial_comment index = index + 1 SV.update_preedit(env, preedit) --储存到环境变量 -- preedit相关处理只跳过 preedit,不影响注释 if is_radical_mode then goto after_preedit end if not is_tone_display and not is_full_pinyin then goto after_preedit end if (not initial_comment or initial_comment == "") then goto after_preedit end do -- 拆分 preedit local input_parts = {} local current_segment = "" for i = 1, #preedit do local char = preedit:sub(i, i) if char == auto_delimiter or char == manual_delimiter then if #current_segment > 0 then table.insert(input_parts, current_segment) current_segment = "" end table.insert(input_parts, char) else current_segment = current_segment .. char end end if #current_segment > 0 then table.insert(input_parts, current_segment) end -- 拆分拼音段(comment) local pinyin_segments = {} for segment in string.gmatch(initial_comment, "[^" .. auto_delimiter .. manual_delimiter .. "]+") do local pinyin = segment:match("^[^;]+") if pinyin then pinyin = pinyin:gsub("[%[%]]", "") --去掉英文词库编码中的[] table.insert(pinyin_segments, pinyin) end end -- 替换逻辑 local pinyin_index = 1 for i, part in ipairs(input_parts) do if part == auto_delimiter or part == manual_delimiter then input_parts[i] = visual_delim else local body, tone = part:match("([%a]+)([^%a]+)") --后面加号很必要 local py = pinyin_segments[pinyin_index] if py then if is_wanxiang_pro then input_parts[i] = py pinyin_index = pinyin_index + 1 elseif i == #input_parts and #part == 1 then local prefix = py:sub(1, 2) local first_char = part:sub(1,1):lower() if first_char == "s" or first_char == "c" or first_char == "z" then input_parts[i] = part else if prefix == "zh" or prefix == "ch" or prefix == "sh" then input_parts[i] = prefix else input_parts[i] = part end end else if tone_isolate then input_parts[i] = py .. (tone or "") else input_parts[i] = py end pinyin_index = pinyin_index + 1 end end end end if is_full_pinyin then for idx, part in ipairs(input_parts) do input_parts[idx] = remove_pinyin_tone(part) end end genuine_cand.preedit = table.concat(input_parts) end ::after_preedit:: apply_tone_preedit(env, genuine_cand) if should_skip_candidate_comment then yield(genuine_cand) goto continue end -- 进入注释处理阶段 -- ① 辅助码注释或者声调注释 if is_comment_hint then local fz_comment = get_fz_comment(cand, env, initial_comment) if fz_comment then final_comment = fz_comment end elseif is_tone_comment then local fz_comment = get_fz_comment(cand, env, initial_comment) if fz_comment then final_comment = fz_comment end else final_comment = "" end -- ② 拆分注释 if is_chaifen_enabled then local cf_comment = CF.get_comment(cand, env) if cf_comment and cf_comment ~= "" then --不为空很重要 final_comment = cf_comment end end -- ③ 错音错字提示 if env.settings.corrector_enabled then local cr_comment = CR.get_comment(cand) if cr_comment and cr_comment ~= "" then final_comment = cr_comment end end -- ④ 反查模式提示 if is_radical_mode then local az_comment = get_az_comment(cand, env, initial_comment) if az_comment and az_comment ~= "" then final_comment = az_comment end end -- 应用注释 if final_comment ~= initial_comment then genuine_cand.comment = final_comment end yield(genuine_cand) ::continue:: end end return ZH