chore:新的根节点

This commit is contained in:
amzxyz
2026-01-21 17:43:36 +08:00
commit 274c2e8d00
94 changed files with 3245054 additions and 0 deletions

View File

@@ -0,0 +1,535 @@
--@amzxyz https://github.com/amzxyz/rime_wanxiang
local wanxiang = require('wanxiang')
local tone_map = {
['ā']='a', ['á']='a', ['ǎ']='a', ['à']='a',
['ē']='e', ['é']='e', ['ě']='e', ['è']='e',
['ī']='i', ['í']='i', ['ǐ']='i', ['ì']='i',
['ō']='o', ['ó']='o', ['ǒ']='o', ['ò']='o', ['ň']='n',
['ū']='u', ['ú']='u', ['ǔ']='u', ['ù']='u', ['ǹ']='n',
['ǖ']='ü', ['ǘ']='ü', ['ǚ']='ü', ['ǜ']='ü', ['ń']='n',
}
local function remove_pinyin_tone(s)
local result = {}
for uchar in s:gmatch("[%z\1-\127\194-\244][\128-\191]*") do
table.insert(result, tone_map[uchar] or uchar)
end
return table.concat(result)
end
-- ----------------------
-- # 辅助码拆分提示模块
-- PRO 专用
-- ----------------------
local CF = {}
function CF.init(env)
if wanxiang.is_pro_scheme(env) then -- pro 版直接初始化
CF.get_dict(env)
end
end
function CF.fini(env)
env.chaifen_dict = nil
collectgarbage()
end
function CF.get_dict(env)
if env.chaifen_dict == nil then
env.chaifen_dict = ReverseLookup("wanxiang_chaifen")
end
return env.chaifen_dict
end
function CF.get_comment(cand, env)
local dict = CF.get_dict(env)
if not dict then return "" end
local raw = dict:lookup(cand.text)
if not raw or raw == "" then return "" end
local tpl = (env and env.settings and env.settings.chaifen) or ""
if tpl ~= "" then
-- 取 chaifen 左右两边
local left, right = tpl:match("^(.-)chaifen(.-)$")
if left then
return left .. raw .. right
end
end
return raw
end
-- ----------------------
-- # 错音错字提示模块
-- ----------------------
local CR = {}
local corrections_cache = nil -- 用于缓存已加载的词典
function CR.init(env)
CR.style = env.settings.corrector_type or '{comment}'
--if corrections_cache then return end
local auto_delimiter = env.settings.auto_delimiter
local is_pro = wanxiang.is_pro_scheme(env)
-- 根据方案选择加载路径
local path = (is_pro and "dicts/cuoyin.pro.dict.yaml") or "dicts/cuoyin.dict.yaml"
local file, close_file, err = wanxiang.load_file_with_fallback(path)
if not file then
log.error(string.format("[super_comment]: 加载失败 %s错误: %s", path, err))
return
end
corrections_cache = {}
for line in file:lines() do
if not line:match("^#") then
local text, code, weight, comment = line:match("^(.-)\t(.-)\t(.-)\t(.-)$")
if text and code then
text = text:match("^%s*(.-)%s*$")
code = code:match("^%s*(.-)%s*$")
comment = comment and comment:match("^%s*(.-)%s*$") or ""
comment = comment:gsub("%s+", auto_delimiter)
code = code:gsub("%s+", auto_delimiter)
corrections_cache[code] = { text = text, comment = comment }
end
end
end
close_file()
end
function CR.get_comment(cand)
local correction = corrections_cache and corrections_cache[cand.comment] or nil
if not (correction and cand.text == correction.text) then
return nil
end
-- 只认占位符 `comment`,按“刀法”切分
local tpl = CR.style or "comment"
local left, right = tpl:match("^(.-)comment(.-)$")
if left then
return left .. correction.comment .. right
else
return correction.comment
end
end
-- ----------------------
-- 部件组字返回的注释
-- ----------------------
---@return string
local function get_az_comment(_, env, initial_comment)
if not initial_comment or initial_comment == "" then return "〔无〕" end
local final_comment = nil
local auto_delimiter = env.settings.auto_delimiter or " "
-- 拆分初始评论为多个段落
local segments = {}
for segment in initial_comment:gmatch("[^%s]+") do
table.insert(segments, segment)
end
local semicolon_count = select(2, segments[1]:gsub(";", "")) -- 使用第一个段来判断分号的数量
local pinyins = {}
local fuzhu = nil
for _, segment in ipairs(segments) do
local pinyin = segment:match("^[^;~]+")
local fz = nil
if semicolon_count == 1 then
-- 一个分号:取后段
fz = segment:match(";(.+)$")
else
-- 无分号不取辅助码
fz = nil
end
if pinyin then table.insert(pinyins, pinyin) end
if not fuzhu and fz and fz ~= "" then fuzhu = fz end
end
-- 拼接结果
if #pinyins > 0 then
local pinyin_str = table.concat(pinyins, ",")
if fuzhu then
final_comment = string.format("〔音%s 辅%s", pinyin_str, fuzhu)
else
final_comment = string.format("〔音%s", pinyin_str)
end
end
return final_comment or "〔无〕"
end
-- ----------------------
-- # 辅助码提示或带调全拼注释模块 (Fuzhu)
-- ----------------------
local function get_fz_comment(cand, env, initial_comment)
local length = utf8.len(cand.text)
if length > env.settings.candidate_length then
return ""
end
local auto_delimiter = env.settings.auto_delimiter or " "
local segments = {}
for segment in string.gmatch(initial_comment, "[^" .. auto_delimiter .. "]+") do
table.insert(segments, segment)
end
-- 根据 option 动态决定是否强制使用 tone
local use_tone = env.engine.context:get_option("tone_hint")
local fuzhu_type = use_tone and "tone" or "fuzhu"
local first_segment = segments[1] or ""
local semicolon_count = select(2, first_segment:gsub(";", ""))
local fuzhu_comments = {}
-- 没有分号的情况
if semicolon_count == 0 then
return initial_comment:gsub(auto_delimiter, " ")
else
-- 有分号:按类型提取
for _, segment in ipairs(segments) do
if fuzhu_type == "tone" then
-- 取第一个分号“前”的内容
local before = segment:match("^(.-);")
if before and before ~= "" then
table.insert(fuzhu_comments, before)
end
else -- "fuzhu"
-- 取第一个分号“后”的内容(到行尾)
local after = segment:match(";(.+)$")
if after and after ~= "" then
table.insert(fuzhu_comments, after)
end
end
end
end
-- 最终拼接输出fuzhu用 `,`tone用 /连接
if #fuzhu_comments > 0 then
if fuzhu_type == "tone" then
return table.concat(fuzhu_comments, " ")
else
return table.concat(fuzhu_comments, "/")
end
else
return ""
end
end
local SV = {}
-- 工具:取光标前的编码(安全处理 caret 越界)
local function front_input(ctx)
if not ctx then return "" end
local raw_full = ctx.input or ""
local caret = ctx.caret_pos or #raw_full
if caret < 0 then
caret = 0
elseif caret > #raw_full then
caret = #raw_full
end
return raw_full:sub(1, caret)
end
-- 这个模块主要用于将滤镜阶段未修改前的注释或者 preedit
-- 存到上下文变量里按键处理阶段使用update_notifier 保证一致性
function SV.init(env)
env._sv_seq_sig = ""
env._sv_last_pre = "" -- 最近一次要写入的 preedit
env._saved_input_for_seq = "" -- 上次对应的 raw_in光标前编码
local ctx = env.engine.context
env._sv_ctx_conn = ctx.update_notifier:connect(function(c)
local raw_in = front_input(c)
local pre = env._sv_last_pre or ""
if pre == "" or raw_in == "" then
return
end
-- 不重写:光标前编码 + preedit
local sig = raw_in .. "\t" .. pre
if env._sv_seq_sig == sig then
return
end
c:set_property("sequence_preedit_key", raw_in)
c:set_property("sequence_preedit_val", pre)
env._sv_seq_sig = sig
end)
end
-- 断开 notifier清理状态
function SV.fini(env)
if env._sv_ctx_conn then
env._sv_ctx_conn:disconnect()
env._sv_ctx_conn = nil
end
env._sv_seq_sig = nil
env._sv_last_pre = nil
env._saved_input_for_seq = nil
end
-- 限制更新范围:同一个 raw_in 只记第一次的 preedit
function SV.update_preedit(env, preedit)
local ctx = env.engine.context
if not ctx then return end
local raw_in = front_input(ctx)
preedit = preedit or ""
if raw_in == "" or preedit == "" then
return
end
if env._saved_input_for_seq ~= raw_in then
env._saved_input_for_seq = raw_in
env._sv_last_pre = preedit
end
end
-- 对 cand.preedit 应用 tone_preedit/0..9 的映射(数字 -> 上标等)
local function apply_tone_preedit(env, cand)
if not cand or not cand.preedit or cand.preedit == "" then
return
end
-- 用 context.input 判断是否有相邻数字
local input
local engine = env.engine
if engine and engine.context then
-- Rime 里一般是 string保险起见兜个 nil
input = engine.context.input or ""
end
-- 如果整条输入串中存在相邻两个数字(例如 "li39"、"abc10" 等),
-- 则整体不做任何转换,直接返回,为了配合小键盘输入逻辑中包吃书字面大小一致性
if input and input ~= "" and input:match("%d%d") then
return
end
-- 懒加载 tone_map
if not env.tone_map then
env.tone_map = {}
local cfg = engine and engine.schema and engine.schema.config
if cfg then
for d = 0, 9 do
local k = tostring(d)
local v = cfg:get_string("tone_preedit/" .. k)
if v and v ~= "" then
env.tone_map[k] = v
end
end
end
end
local preedit = cand.preedit
local converted = preedit:gsub("([^%d%s]+)(%d+)", function(body, digits)
local mapped = digits:gsub("%d", function(d)
return env.tone_map and env.tone_map[d] or d
end)
return body .. mapped
end)
if converted ~= preedit then
cand.preedit = converted
end
end
-- ----------------------
-- 主函数根据优先级处理候选词的注释和preedit
-- ----------------------
local ZH = {}
function ZH.init(env)
local config = env.engine.schema.config
local delimiter = config:get_string('speller/delimiter') or " '"
local auto_delimiter = delimiter:sub(1, 1)
local manual_delimiter = delimiter:sub(2, 2)
env.settings = {
delimiter = delimiter,
auto_delimiter = auto_delimiter,
manual_delimiter = manual_delimiter,
corrector_enabled = config:get_bool("super_comment/corrector") or true,
corrector_type = config:get_string("super_comment/corrector_type") or "{comment}",
chaifen = config:get_string("super_comment/chaifen") or "chaifen",
candidate_length = tonumber(config:get_string("super_comment/candidate_length")) or 1,
}
CR.init(env)
SV.init(env)
end
function ZH.fini(env)
-- 清理
CF.fini(env)
SV.fini(env)
end
function ZH.func(input, env)
local config = env.engine.schema.config
local context = env.engine.context
local input_str = context.input
local is_radical_mode = wanxiang.is_in_radical_mode(env)
local schema_id = env.engine.schema.schema_id or ""
local is_wanxiang_pro = (schema_id == "wanxiang_pro")
local should_skip_candidate_comment = wanxiang.is_function_mode_active(context) or input_str == ""
local is_tone_comment = env.engine.context:get_option("tone_hint")
local is_comment_hint = env.engine.context:get_option("fuzhu_hint")
local is_chaifen_enabled = env.engine.context:get_option("chaifen_switch")
--preedit相关声明
local delimiter = env.settings.delimiter
local auto_delimiter = env.settings.auto_delimiter
local manual_delimiter = env.settings.manual_delimiter
local visual_delim = config:get_string("speller/visual_delimiter") or " "
local tone_isolate = config:get_bool("speller/tone_isolate")
local is_tone_display = context:get_option("tone_display")
local is_full_pinyin = context:get_option("full_pinyin")
local index = 0
-- auto_phrase 相关声明
local enable_auto_phrase = config:get_bool("add_user_dict/enable_auto_phrase") or false
local enable_user_dict = config:get_bool("add_user_dict/enable_user_dict") or false
for cand in input:iter() do
local genuine_cand = cand:get_genuine()
local preedit = genuine_cand.preedit or ""
local initial_comment = genuine_cand.comment
local final_comment = initial_comment
index = index + 1
SV.update_preedit(env, preedit) --储存到环境变量
-- preedit相关处理只跳过 preedit不影响注释
if is_radical_mode then
goto after_preedit
end
if not is_tone_display and not is_full_pinyin then
goto after_preedit
end
if (not initial_comment or initial_comment == "") then
goto after_preedit
end
do
-- 拆分 preedit
local input_parts = {}
local current_segment = ""
for i = 1, #preedit do
local char = preedit:sub(i, i)
if char == auto_delimiter or char == manual_delimiter then
if #current_segment > 0 then
table.insert(input_parts, current_segment)
current_segment = ""
end
table.insert(input_parts, char)
else
current_segment = current_segment .. char
end
end
if #current_segment > 0 then
table.insert(input_parts, current_segment)
end
-- 拆分拼音段comment
local pinyin_segments = {}
for segment in string.gmatch(initial_comment, "[^" .. auto_delimiter .. manual_delimiter .. "]+") do
local pinyin = segment:match("^[^;]+")
if pinyin then
pinyin = pinyin:gsub("[%[%]]", "") --去掉英文词库编码中的[]
table.insert(pinyin_segments, pinyin)
end
end
-- 替换逻辑
local pinyin_index = 1
for i, part in ipairs(input_parts) do
if part == auto_delimiter or part == manual_delimiter then
input_parts[i] = visual_delim
else
local body, tone = part:match("([%a]+)([^%a]+)") --后面加号很必要
local py = pinyin_segments[pinyin_index]
if py then
if is_wanxiang_pro then
input_parts[i] = py
pinyin_index = pinyin_index + 1
elseif i == #input_parts and #part == 1 then
local prefix = py:sub(1, 2)
local first_char = part:sub(1,1):lower()
if first_char == "s" or first_char == "c" or first_char == "z" then
input_parts[i] = part
else
if prefix == "zh" or prefix == "ch" or prefix == "sh" then
input_parts[i] = prefix
else
input_parts[i] = part
end
end
else
if tone_isolate then
input_parts[i] = py .. (tone or "")
else
input_parts[i] = py
end
pinyin_index = pinyin_index + 1
end
end
end
end
if is_full_pinyin then
for idx, part in ipairs(input_parts) do
input_parts[idx] = remove_pinyin_tone(part)
end
end
genuine_cand.preedit = table.concat(input_parts)
end
::after_preedit::
apply_tone_preedit(env, genuine_cand)
if should_skip_candidate_comment then
yield(genuine_cand)
goto continue
end
-- 进入注释处理阶段
-- ① 辅助码注释或者声调注释
if is_comment_hint then
local fz_comment = get_fz_comment(cand, env, initial_comment)
if fz_comment then
final_comment = fz_comment
end
elseif is_tone_comment then
local fz_comment = get_fz_comment(cand, env, initial_comment)
if fz_comment then
final_comment = fz_comment
end
else
final_comment = ""
end
-- ② 拆分注释
if is_chaifen_enabled then
local cf_comment = CF.get_comment(cand, env)
if cf_comment and cf_comment ~= "" then --不为空很重要
final_comment = cf_comment
end
end
-- ③ 错音错字提示
if env.settings.corrector_enabled then
local cr_comment = CR.get_comment(cand)
if cr_comment and cr_comment ~= "" then
final_comment = cr_comment
end
end
-- ④ 反查模式提示
if is_radical_mode then
local az_comment = get_az_comment(cand, env, initial_comment)
if az_comment and az_comment ~= "" then
final_comment = az_comment
end
end
-- 应用注释
if final_comment ~= initial_comment then
genuine_cand.comment = final_comment
end
yield(genuine_cand)
::continue::
end
end
return ZH