chore：新的根节点

2026-05-13 23:16:54 +00:00 · 2026-01-21 17:43:36 +08:00
commit 274c2e8d00
94 changed files with 3245054 additions and 0 deletions
--- a/lua/super_comment_preedit.lua
+++ b/lua/super_comment_preedit.lua
@@ -0,0 +1,535 @@
+--@amzxyz https://github.com/amzxyz/rime_wanxiang
+
+
+local wanxiang = require('wanxiang')
+
+local tone_map = {
+    ['ā']='a', ['á']='a', ['ǎ']='a', ['à']='a',
+    ['ē']='e', ['é']='e', ['ě']='e', ['è']='e',
+    ['ī']='i', ['í']='i', ['ǐ']='i', ['ì']='i',
+    ['ō']='o', ['ó']='o', ['ǒ']='o', ['ò']='o', ['ň']='n',
+    ['ū']='u', ['ú']='u', ['ǔ']='u', ['ù']='u', ['ǹ']='n',
+    ['ǖ']='ü', ['ǘ']='ü', ['ǚ']='ü', ['ǜ']='ü', ['ń']='n',
+}
+
+local function remove_pinyin_tone(s)
+    local result = {}
+    for uchar in s:gmatch("[%z\1-\127\194-\244][\128-\191]*") do
+        table.insert(result, tone_map[uchar] or uchar)
+    end
+    return table.concat(result)
+end
+
+-- ----------------------
+-- # 辅助码拆分提示模块
+-- PRO 专用
+-- ----------------------
+local CF = {}
+function CF.init(env)
+    if wanxiang.is_pro_scheme(env) then -- pro 版直接初始化
+        CF.get_dict(env)
+    end
+end
+
+function CF.fini(env)
+    env.chaifen_dict = nil
+    collectgarbage()
+end
+
+function CF.get_dict(env)
+    if env.chaifen_dict == nil then
+        env.chaifen_dict = ReverseLookup("wanxiang_chaifen")
+    end
+    return env.chaifen_dict
+end
+
+function CF.get_comment(cand, env)
+    local dict = CF.get_dict(env)
+    if not dict then return "" end
+
+    local raw = dict:lookup(cand.text)
+    if not raw or raw == "" then return "" end
+
+    local tpl = (env and env.settings and env.settings.chaifen) or ""
+
+    if tpl ~= "" then
+        -- 取 chaifen 左右两边
+        local left, right = tpl:match("^(.-)chaifen(.-)$")
+        if left then
+            return left .. raw .. right
+        end
+    end
+
+    return raw
+end
+
+-- ----------------------
+-- # 错音错字提示模块
+-- ----------------------
+local CR = {}
+local corrections_cache = nil -- 用于缓存已加载的词典
+function CR.init(env)
+    CR.style = env.settings.corrector_type or '{comment}'
+    --if corrections_cache then return end
+    local auto_delimiter = env.settings.auto_delimiter
+    local is_pro = wanxiang.is_pro_scheme(env)
+    -- 根据方案选择加载路径
+    local path = (is_pro and "dicts/cuoyin.pro.dict.yaml") or "dicts/cuoyin.dict.yaml"
+    local file, close_file, err = wanxiang.load_file_with_fallback(path)
+    if not file then
+        log.error(string.format("[super_comment]: 加载失败 %s，错误: %s", path, err))
+        return
+    end
+    corrections_cache = {}
+    for line in file:lines() do
+        if not line:match("^#") then
+            local text, code, weight, comment = line:match("^(.-)\t(.-)\t(.-)\t(.-)$")
+            if text and code then
+                text = text:match("^%s*(.-)%s*$")
+                code = code:match("^%s*(.-)%s*$")
+                comment = comment and comment:match("^%s*(.-)%s*$") or ""
+                comment = comment:gsub("%s+", auto_delimiter)
+                code = code:gsub("%s+", auto_delimiter)
+                corrections_cache[code] = { text = text, comment = comment }
+            end
+        end
+    end
+    close_file()
+end
+
+function CR.get_comment(cand)
+    local correction = corrections_cache and corrections_cache[cand.comment] or nil
+    if not (correction and cand.text == correction.text) then
+        return nil
+    end
+    -- 只认占位符 `comment`，按“刀法”切分
+    local tpl = CR.style or "comment"
+    local left, right = tpl:match("^(.-)comment(.-)$")
+
+    if left then
+        return left .. correction.comment .. right
+    else
+        return correction.comment
+    end
+end
+
+-- ----------------------
+-- 部件组字返回的注释
+-- ----------------------
+---@return string
+local function get_az_comment(_, env, initial_comment)
+    if not initial_comment or initial_comment == "" then return "〔无〕" end
+    local final_comment = nil
+    local auto_delimiter = env.settings.auto_delimiter or " "
+    -- 拆分初始评论为多个段落
+    local segments = {}
+    for segment in initial_comment:gmatch("[^%s]+") do
+        table.insert(segments, segment)
+    end
+    local semicolon_count = select(2, segments[1]:gsub(";", "")) -- 使用第一个段来判断分号的数量
+    local pinyins = {}
+    local fuzhu = nil
+    for _, segment in ipairs(segments) do
+        local pinyin = segment:match("^[^;~]+")
+        local fz = nil
+
+        if semicolon_count == 1 then
+            -- 一个分号：取后段
+            fz = segment:match(";(.+)$")
+        else
+            -- 无分号不取辅助码
+            fz = nil
+        end
+
+        if pinyin then table.insert(pinyins, pinyin) end
+        if not fuzhu and fz and fz ~= "" then fuzhu = fz end
+    end
+
+    -- 拼接结果
+    if #pinyins > 0 then
+        local pinyin_str = table.concat(pinyins, ",")
+        if fuzhu then
+            final_comment = string.format("〔音%s 辅%s〕", pinyin_str, fuzhu)
+        else
+            final_comment = string.format("〔音%s〕", pinyin_str)
+        end
+    end
+    return final_comment or "〔无〕"
+end
+-- ----------------------
+-- # 辅助码提示或带调全拼注释模块 (Fuzhu)
+-- ----------------------
+local function get_fz_comment(cand, env, initial_comment)
+    local length = utf8.len(cand.text)
+    if length > env.settings.candidate_length then
+        return ""
+    end
+    local auto_delimiter = env.settings.auto_delimiter or " "
+    local segments = {}
+    for segment in string.gmatch(initial_comment, "[^" .. auto_delimiter .. "]+") do
+        table.insert(segments, segment)
+    end
+
+    -- 根据 option 动态决定是否强制使用 tone
+    local use_tone = env.engine.context:get_option("tone_hint")
+    local fuzhu_type = use_tone and "tone" or "fuzhu"
+
+    local first_segment = segments[1] or ""
+    local semicolon_count = select(2, first_segment:gsub(";", ""))
+    local fuzhu_comments = {}
+    -- 没有分号的情况
+    if semicolon_count == 0 then
+        return initial_comment:gsub(auto_delimiter, " ")
+    else
+        -- 有分号：按类型提取
+        for _, segment in ipairs(segments) do
+            if fuzhu_type == "tone" then
+                -- 取第一个分号“前”的内容
+                local before = segment:match("^(.-);")
+                if before and before ~= "" then
+                    table.insert(fuzhu_comments, before)
+                end
+            else -- "fuzhu"
+                -- 取第一个分号“后”的内容（到行尾）
+                local after = segment:match(";(.+)$")
+                if after and after ~= "" then
+                    table.insert(fuzhu_comments, after)
+                end
+            end
+        end
+    end
+
+    -- 最终拼接输出，fuzhu用 `,`，tone用 /连接
+    if #fuzhu_comments > 0 then
+        if fuzhu_type == "tone" then
+            return table.concat(fuzhu_comments, " ")
+        else
+            return table.concat(fuzhu_comments, "/")
+        end
+    else
+        return ""
+    end
+end
+
+
+local SV = {}
+
+-- 工具：取光标前的编码（安全处理 caret 越界）
+local function front_input(ctx)
+    if not ctx then return "" end
+    local raw_full = ctx.input or ""
+    local caret    = ctx.caret_pos or #raw_full
+    if caret < 0 then
+        caret = 0
+    elseif caret > #raw_full then
+        caret = #raw_full
+    end
+    return raw_full:sub(1, caret)
+end
+
+-- 这个模块主要用于将滤镜阶段未修改前的注释或者 preedit
+-- 存到上下文变量里，按键处理阶段使用；update_notifier 保证一致性
+function SV.init(env)
+    env._sv_seq_sig          = ""
+    env._sv_last_pre         = ""   -- 最近一次要写入的 preedit
+    env._saved_input_for_seq = ""   -- 上次对应的 raw_in（光标前编码）
+
+    local ctx = env.engine.context
+
+    env._sv_ctx_conn = ctx.update_notifier:connect(function(c)
+        local raw_in = front_input(c)
+
+        local pre = env._sv_last_pre or ""
+        if pre == "" or raw_in == "" then
+            return
+        end
+
+        -- 不重写：光标前编码 + preedit
+        local sig = raw_in .. "\t" .. pre
+        if env._sv_seq_sig == sig then
+            return
+        end
+
+        c:set_property("sequence_preedit_key", raw_in)
+        c:set_property("sequence_preedit_val", pre)
+        env._sv_seq_sig = sig
+    end)
+end
+
+-- 断开 notifier，清理状态
+function SV.fini(env)
+    if env._sv_ctx_conn then
+        env._sv_ctx_conn:disconnect()
+        env._sv_ctx_conn = nil
+    end
+    env._sv_seq_sig          = nil
+    env._sv_last_pre         = nil
+    env._saved_input_for_seq = nil
+end
+
+-- 限制更新范围：同一个 raw_in 只记第一次的 preedit
+function SV.update_preedit(env, preedit)
+    local ctx = env.engine.context
+    if not ctx then return end
+
+    local raw_in = front_input(ctx)
+    preedit = preedit or ""
+
+    if raw_in == "" or preedit == "" then
+        return
+    end
+
+    if env._saved_input_for_seq ~= raw_in then
+        env._saved_input_for_seq = raw_in
+        env._sv_last_pre         = preedit
+    end
+end
+-- 对 cand.preedit 应用 tone_preedit/0..9 的映射（数字 -> 上标等）
+local function apply_tone_preedit(env, cand)
+    if not cand or not cand.preedit or cand.preedit == "" then
+        return
+    end
+
+    -- 用 context.input 判断是否有相邻数字
+    local input
+    local engine = env.engine
+    if engine and engine.context then
+        -- Rime 里一般是 string，保险起见兜个 nil
+        input = engine.context.input or ""
+    end
+
+    -- 如果整条输入串中存在相邻两个数字（例如 "li39"、"abc10" 等），
+    -- 则整体不做任何转换，直接返回，为了配合小键盘输入逻辑中包吃书字面大小一致性
+    if input and input ~= "" and input:match("%d%d") then
+        return
+    end
+
+    -- 懒加载 tone_map
+    if not env.tone_map then
+        env.tone_map = {}
+        local cfg = engine and engine.schema and engine.schema.config
+        if cfg then
+            for d = 0, 9 do
+                local k = tostring(d)
+                local v = cfg:get_string("tone_preedit/" .. k)
+                if v and v ~= "" then
+                    env.tone_map[k] = v
+                end
+            end
+        end
+    end
+
+    local preedit = cand.preedit
+    local converted = preedit:gsub("([^%d%s]+)(%d+)", function(body, digits)
+        local mapped = digits:gsub("%d", function(d)
+            return env.tone_map and env.tone_map[d] or d
+        end)
+        return body .. mapped
+    end)
+
+    if converted ~= preedit then
+        cand.preedit = converted
+    end
+end
+
+
+-- ----------------------
+-- 主函数：根据优先级处理候选词的注释和preedit
+-- ----------------------
+local ZH = {}
+function ZH.init(env)
+    local config = env.engine.schema.config
+    local delimiter = config:get_string('speller/delimiter') or " '"
+    local auto_delimiter = delimiter:sub(1, 1)
+    local manual_delimiter = delimiter:sub(2, 2)
+    env.settings = {
+        delimiter = delimiter,
+        auto_delimiter = auto_delimiter,
+        manual_delimiter = manual_delimiter,
+        corrector_enabled = config:get_bool("super_comment/corrector") or true,
+        corrector_type = config:get_string("super_comment/corrector_type") or "{comment}",
+        chaifen = config:get_string("super_comment/chaifen") or "〔chaifen〕",
+        candidate_length = tonumber(config:get_string("super_comment/candidate_length")) or 1,
+    }
+    CR.init(env)
+    SV.init(env)
+end
+function ZH.fini(env)
+    -- 清理
+    CF.fini(env)
+    SV.fini(env)
+end
+function ZH.func(input, env)
+    local config = env.engine.schema.config
+    local context = env.engine.context
+    local input_str = context.input
+    local is_radical_mode = wanxiang.is_in_radical_mode(env)
+    local schema_id = env.engine.schema.schema_id or ""
+    local is_wanxiang_pro = (schema_id == "wanxiang_pro")
+    local should_skip_candidate_comment = wanxiang.is_function_mode_active(context) or input_str == ""
+    local is_tone_comment = env.engine.context:get_option("tone_hint")
+    local is_comment_hint = env.engine.context:get_option("fuzhu_hint")
+    local is_chaifen_enabled = env.engine.context:get_option("chaifen_switch")
+    --preedit相关声明
+    local delimiter = env.settings.delimiter
+    local auto_delimiter = env.settings.auto_delimiter
+    local manual_delimiter = env.settings.manual_delimiter
+    local visual_delim = config:get_string("speller/visual_delimiter") or " "
+    local tone_isolate = config:get_bool("speller/tone_isolate")
+    local is_tone_display = context:get_option("tone_display")
+    local is_full_pinyin = context:get_option("full_pinyin")
+    local index = 0
+    -- auto_phrase 相关声明
+    local enable_auto_phrase = config:get_bool("add_user_dict/enable_auto_phrase") or false
+    local enable_user_dict = config:get_bool("add_user_dict/enable_user_dict") or false
+
+    for cand in input:iter() do
+        local genuine_cand = cand:get_genuine()
+        local preedit = genuine_cand.preedit or ""
+        local initial_comment = genuine_cand.comment
+        local final_comment = initial_comment
+        index = index + 1
+
+        SV.update_preedit(env, preedit) --储存到环境变量
+
+        -- preedit相关处理只跳过 preedit，不影响注释
+        if is_radical_mode then
+            goto after_preedit
+        end
+        if not is_tone_display and not is_full_pinyin then
+            goto after_preedit
+        end
+        if (not initial_comment or initial_comment == "") then
+            goto after_preedit
+        end
+        do
+            -- 拆分 preedit
+            local input_parts = {}
+            local current_segment = ""
+            for i = 1, #preedit do
+                local char = preedit:sub(i, i)
+                if char == auto_delimiter or char == manual_delimiter then
+                    if #current_segment > 0 then
+                        table.insert(input_parts, current_segment)
+                        current_segment = ""
+                    end
+                    table.insert(input_parts, char)
+                else
+                    current_segment = current_segment .. char
+                end
+            end
+            if #current_segment > 0 then
+                table.insert(input_parts, current_segment)
+            end
+
+            -- 拆分拼音段（comment）
+            local pinyin_segments = {}
+            for segment in string.gmatch(initial_comment, "[^" .. auto_delimiter .. manual_delimiter .. "]+") do
+                local pinyin = segment:match("^[^;]+")
+                if pinyin then
+                    pinyin = pinyin:gsub("[%[%]]", "")  --去掉英文词库编码中的[]
+                    table.insert(pinyin_segments, pinyin)
+                end
+            end
+
+            -- 替换逻辑
+            local pinyin_index = 1
+            for i, part in ipairs(input_parts) do
+                if part == auto_delimiter or part == manual_delimiter then
+                    input_parts[i] = visual_delim
+                else
+                    local body, tone = part:match("([%a]+)([^%a]+)") --后面加号很必要
+                    local py = pinyin_segments[pinyin_index]
+
+                    if py then
+                        if is_wanxiang_pro then
+                            input_parts[i] = py
+                            pinyin_index = pinyin_index + 1
+                        elseif i == #input_parts and #part == 1 then
+                            local prefix = py:sub(1, 2)
+                            local first_char = part:sub(1,1):lower()
+                            if first_char == "s" or first_char == "c" or first_char == "z" then
+                                input_parts[i] = part
+                            else
+                                if prefix == "zh" or prefix == "ch" or prefix == "sh" then
+                                    input_parts[i] = prefix
+                                else
+                                    input_parts[i] = part
+                                end
+                            end
+                        else
+                            if tone_isolate then
+                                input_parts[i] = py .. (tone or "")
+                            else
+                                input_parts[i] = py
+                            end
+                            pinyin_index = pinyin_index + 1
+                        end
+                    end
+                end
+            end
+
+            if is_full_pinyin then
+                for idx, part in ipairs(input_parts) do
+                    input_parts[idx] = remove_pinyin_tone(part)
+                end
+            end
+
+            genuine_cand.preedit = table.concat(input_parts)
+        end
+        ::after_preedit::
+        apply_tone_preedit(env, genuine_cand)
+        if should_skip_candidate_comment then
+            yield(genuine_cand)
+            goto continue
+        end
+        -- 进入注释处理阶段
+        -- ① 辅助码注释或者声调注释
+        if is_comment_hint then
+            local fz_comment = get_fz_comment(cand, env, initial_comment)
+            if fz_comment then
+                final_comment = fz_comment
+            end
+        elseif is_tone_comment then
+            local fz_comment = get_fz_comment(cand, env, initial_comment)
+            if fz_comment then
+                final_comment = fz_comment
+            end
+        else
+            final_comment = ""
+        end
+
+        -- ② 拆分注释
+        if is_chaifen_enabled then
+            local cf_comment = CF.get_comment(cand, env)
+            if cf_comment and cf_comment ~= "" then  --不为空很重要
+                final_comment = cf_comment
+            end
+        end
+
+        -- ③ 错音错字提示
+        if env.settings.corrector_enabled then
+            local cr_comment = CR.get_comment(cand)
+            if cr_comment and cr_comment ~= "" then
+                final_comment = cr_comment
+            end
+        end
+
+        -- ④ 反查模式提示
+        if is_radical_mode then
+            local az_comment = get_az_comment(cand, env, initial_comment)
+            if az_comment and az_comment ~= "" then
+                final_comment = az_comment
+            end
+        end
+
+        -- 应用注释
+        if final_comment ~= initial_comment then
+            genuine_cand.comment = final_comment
+        end
+
+        yield(genuine_cand)
+        ::continue::
+    end
+end
+return ZH