Files
rime_wanxiang/lua/super_comment_preedit.lua
2026-01-21 17:43:36 +08:00

536 lines
18 KiB
Lua
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
--@amzxyz https://github.com/amzxyz/rime_wanxiang
local wanxiang = require('wanxiang')
local tone_map = {
['ā']='a', ['á']='a', ['ǎ']='a', ['à']='a',
['ē']='e', ['é']='e', ['ě']='e', ['è']='e',
['ī']='i', ['í']='i', ['ǐ']='i', ['ì']='i',
['ō']='o', ['ó']='o', ['ǒ']='o', ['ò']='o', ['ň']='n',
['ū']='u', ['ú']='u', ['ǔ']='u', ['ù']='u', ['ǹ']='n',
['ǖ']='ü', ['ǘ']='ü', ['ǚ']='ü', ['ǜ']='ü', ['ń']='n',
}
local function remove_pinyin_tone(s)
local result = {}
for uchar in s:gmatch("[%z\1-\127\194-\244][\128-\191]*") do
table.insert(result, tone_map[uchar] or uchar)
end
return table.concat(result)
end
-- ----------------------
-- # 辅助码拆分提示模块
-- PRO 专用
-- ----------------------
local CF = {}
function CF.init(env)
if wanxiang.is_pro_scheme(env) then -- pro 版直接初始化
CF.get_dict(env)
end
end
function CF.fini(env)
env.chaifen_dict = nil
collectgarbage()
end
function CF.get_dict(env)
if env.chaifen_dict == nil then
env.chaifen_dict = ReverseLookup("wanxiang_chaifen")
end
return env.chaifen_dict
end
function CF.get_comment(cand, env)
local dict = CF.get_dict(env)
if not dict then return "" end
local raw = dict:lookup(cand.text)
if not raw or raw == "" then return "" end
local tpl = (env and env.settings and env.settings.chaifen) or ""
if tpl ~= "" then
-- 取 chaifen 左右两边
local left, right = tpl:match("^(.-)chaifen(.-)$")
if left then
return left .. raw .. right
end
end
return raw
end
-- ----------------------
-- # 错音错字提示模块
-- ----------------------
local CR = {}
local corrections_cache = nil -- 用于缓存已加载的词典
function CR.init(env)
CR.style = env.settings.corrector_type or '{comment}'
--if corrections_cache then return end
local auto_delimiter = env.settings.auto_delimiter
local is_pro = wanxiang.is_pro_scheme(env)
-- 根据方案选择加载路径
local path = (is_pro and "dicts/cuoyin.pro.dict.yaml") or "dicts/cuoyin.dict.yaml"
local file, close_file, err = wanxiang.load_file_with_fallback(path)
if not file then
log.error(string.format("[super_comment]: 加载失败 %s错误: %s", path, err))
return
end
corrections_cache = {}
for line in file:lines() do
if not line:match("^#") then
local text, code, weight, comment = line:match("^(.-)\t(.-)\t(.-)\t(.-)$")
if text and code then
text = text:match("^%s*(.-)%s*$")
code = code:match("^%s*(.-)%s*$")
comment = comment and comment:match("^%s*(.-)%s*$") or ""
comment = comment:gsub("%s+", auto_delimiter)
code = code:gsub("%s+", auto_delimiter)
corrections_cache[code] = { text = text, comment = comment }
end
end
end
close_file()
end
function CR.get_comment(cand)
local correction = corrections_cache and corrections_cache[cand.comment] or nil
if not (correction and cand.text == correction.text) then
return nil
end
-- 只认占位符 `comment`,按“刀法”切分
local tpl = CR.style or "comment"
local left, right = tpl:match("^(.-)comment(.-)$")
if left then
return left .. correction.comment .. right
else
return correction.comment
end
end
-- ----------------------
-- 部件组字返回的注释
-- ----------------------
---@return string
local function get_az_comment(_, env, initial_comment)
if not initial_comment or initial_comment == "" then return "〔无〕" end
local final_comment = nil
local auto_delimiter = env.settings.auto_delimiter or " "
-- 拆分初始评论为多个段落
local segments = {}
for segment in initial_comment:gmatch("[^%s]+") do
table.insert(segments, segment)
end
local semicolon_count = select(2, segments[1]:gsub(";", "")) -- 使用第一个段来判断分号的数量
local pinyins = {}
local fuzhu = nil
for _, segment in ipairs(segments) do
local pinyin = segment:match("^[^;~]+")
local fz = nil
if semicolon_count == 1 then
-- 一个分号:取后段
fz = segment:match(";(.+)$")
else
-- 无分号不取辅助码
fz = nil
end
if pinyin then table.insert(pinyins, pinyin) end
if not fuzhu and fz and fz ~= "" then fuzhu = fz end
end
-- 拼接结果
if #pinyins > 0 then
local pinyin_str = table.concat(pinyins, ",")
if fuzhu then
final_comment = string.format("〔音%s 辅%s", pinyin_str, fuzhu)
else
final_comment = string.format("〔音%s", pinyin_str)
end
end
return final_comment or "〔无〕"
end
-- ----------------------
-- # 辅助码提示或带调全拼注释模块 (Fuzhu)
-- ----------------------
local function get_fz_comment(cand, env, initial_comment)
local length = utf8.len(cand.text)
if length > env.settings.candidate_length then
return ""
end
local auto_delimiter = env.settings.auto_delimiter or " "
local segments = {}
for segment in string.gmatch(initial_comment, "[^" .. auto_delimiter .. "]+") do
table.insert(segments, segment)
end
-- 根据 option 动态决定是否强制使用 tone
local use_tone = env.engine.context:get_option("tone_hint")
local fuzhu_type = use_tone and "tone" or "fuzhu"
local first_segment = segments[1] or ""
local semicolon_count = select(2, first_segment:gsub(";", ""))
local fuzhu_comments = {}
-- 没有分号的情况
if semicolon_count == 0 then
return initial_comment:gsub(auto_delimiter, " ")
else
-- 有分号:按类型提取
for _, segment in ipairs(segments) do
if fuzhu_type == "tone" then
-- 取第一个分号“前”的内容
local before = segment:match("^(.-);")
if before and before ~= "" then
table.insert(fuzhu_comments, before)
end
else -- "fuzhu"
-- 取第一个分号“后”的内容(到行尾)
local after = segment:match(";(.+)$")
if after and after ~= "" then
table.insert(fuzhu_comments, after)
end
end
end
end
-- 最终拼接输出fuzhu用 `,`tone用 /连接
if #fuzhu_comments > 0 then
if fuzhu_type == "tone" then
return table.concat(fuzhu_comments, " ")
else
return table.concat(fuzhu_comments, "/")
end
else
return ""
end
end
local SV = {}
-- 工具:取光标前的编码(安全处理 caret 越界)
local function front_input(ctx)
if not ctx then return "" end
local raw_full = ctx.input or ""
local caret = ctx.caret_pos or #raw_full
if caret < 0 then
caret = 0
elseif caret > #raw_full then
caret = #raw_full
end
return raw_full:sub(1, caret)
end
-- 这个模块主要用于将滤镜阶段未修改前的注释或者 preedit
-- 存到上下文变量里按键处理阶段使用update_notifier 保证一致性
function SV.init(env)
env._sv_seq_sig = ""
env._sv_last_pre = "" -- 最近一次要写入的 preedit
env._saved_input_for_seq = "" -- 上次对应的 raw_in光标前编码
local ctx = env.engine.context
env._sv_ctx_conn = ctx.update_notifier:connect(function(c)
local raw_in = front_input(c)
local pre = env._sv_last_pre or ""
if pre == "" or raw_in == "" then
return
end
-- 不重写:光标前编码 + preedit
local sig = raw_in .. "\t" .. pre
if env._sv_seq_sig == sig then
return
end
c:set_property("sequence_preedit_key", raw_in)
c:set_property("sequence_preedit_val", pre)
env._sv_seq_sig = sig
end)
end
-- 断开 notifier清理状态
function SV.fini(env)
if env._sv_ctx_conn then
env._sv_ctx_conn:disconnect()
env._sv_ctx_conn = nil
end
env._sv_seq_sig = nil
env._sv_last_pre = nil
env._saved_input_for_seq = nil
end
-- 限制更新范围:同一个 raw_in 只记第一次的 preedit
function SV.update_preedit(env, preedit)
local ctx = env.engine.context
if not ctx then return end
local raw_in = front_input(ctx)
preedit = preedit or ""
if raw_in == "" or preedit == "" then
return
end
if env._saved_input_for_seq ~= raw_in then
env._saved_input_for_seq = raw_in
env._sv_last_pre = preedit
end
end
-- 对 cand.preedit 应用 tone_preedit/0..9 的映射(数字 -> 上标等)
local function apply_tone_preedit(env, cand)
if not cand or not cand.preedit or cand.preedit == "" then
return
end
-- 用 context.input 判断是否有相邻数字
local input
local engine = env.engine
if engine and engine.context then
-- Rime 里一般是 string保险起见兜个 nil
input = engine.context.input or ""
end
-- 如果整条输入串中存在相邻两个数字(例如 "li39"、"abc10" 等),
-- 则整体不做任何转换,直接返回,为了配合小键盘输入逻辑中包吃书字面大小一致性
if input and input ~= "" and input:match("%d%d") then
return
end
-- 懒加载 tone_map
if not env.tone_map then
env.tone_map = {}
local cfg = engine and engine.schema and engine.schema.config
if cfg then
for d = 0, 9 do
local k = tostring(d)
local v = cfg:get_string("tone_preedit/" .. k)
if v and v ~= "" then
env.tone_map[k] = v
end
end
end
end
local preedit = cand.preedit
local converted = preedit:gsub("([^%d%s]+)(%d+)", function(body, digits)
local mapped = digits:gsub("%d", function(d)
return env.tone_map and env.tone_map[d] or d
end)
return body .. mapped
end)
if converted ~= preedit then
cand.preedit = converted
end
end
-- ----------------------
-- 主函数根据优先级处理候选词的注释和preedit
-- ----------------------
local ZH = {}
function ZH.init(env)
local config = env.engine.schema.config
local delimiter = config:get_string('speller/delimiter') or " '"
local auto_delimiter = delimiter:sub(1, 1)
local manual_delimiter = delimiter:sub(2, 2)
env.settings = {
delimiter = delimiter,
auto_delimiter = auto_delimiter,
manual_delimiter = manual_delimiter,
corrector_enabled = config:get_bool("super_comment/corrector") or true,
corrector_type = config:get_string("super_comment/corrector_type") or "{comment}",
chaifen = config:get_string("super_comment/chaifen") or "chaifen",
candidate_length = tonumber(config:get_string("super_comment/candidate_length")) or 1,
}
CR.init(env)
SV.init(env)
end
function ZH.fini(env)
-- 清理
CF.fini(env)
SV.fini(env)
end
function ZH.func(input, env)
local config = env.engine.schema.config
local context = env.engine.context
local input_str = context.input
local is_radical_mode = wanxiang.is_in_radical_mode(env)
local schema_id = env.engine.schema.schema_id or ""
local is_wanxiang_pro = (schema_id == "wanxiang_pro")
local should_skip_candidate_comment = wanxiang.is_function_mode_active(context) or input_str == ""
local is_tone_comment = env.engine.context:get_option("tone_hint")
local is_comment_hint = env.engine.context:get_option("fuzhu_hint")
local is_chaifen_enabled = env.engine.context:get_option("chaifen_switch")
--preedit相关声明
local delimiter = env.settings.delimiter
local auto_delimiter = env.settings.auto_delimiter
local manual_delimiter = env.settings.manual_delimiter
local visual_delim = config:get_string("speller/visual_delimiter") or " "
local tone_isolate = config:get_bool("speller/tone_isolate")
local is_tone_display = context:get_option("tone_display")
local is_full_pinyin = context:get_option("full_pinyin")
local index = 0
-- auto_phrase 相关声明
local enable_auto_phrase = config:get_bool("add_user_dict/enable_auto_phrase") or false
local enable_user_dict = config:get_bool("add_user_dict/enable_user_dict") or false
for cand in input:iter() do
local genuine_cand = cand:get_genuine()
local preedit = genuine_cand.preedit or ""
local initial_comment = genuine_cand.comment
local final_comment = initial_comment
index = index + 1
SV.update_preedit(env, preedit) --储存到环境变量
-- preedit相关处理只跳过 preedit不影响注释
if is_radical_mode then
goto after_preedit
end
if not is_tone_display and not is_full_pinyin then
goto after_preedit
end
if (not initial_comment or initial_comment == "") then
goto after_preedit
end
do
-- 拆分 preedit
local input_parts = {}
local current_segment = ""
for i = 1, #preedit do
local char = preedit:sub(i, i)
if char == auto_delimiter or char == manual_delimiter then
if #current_segment > 0 then
table.insert(input_parts, current_segment)
current_segment = ""
end
table.insert(input_parts, char)
else
current_segment = current_segment .. char
end
end
if #current_segment > 0 then
table.insert(input_parts, current_segment)
end
-- 拆分拼音段comment
local pinyin_segments = {}
for segment in string.gmatch(initial_comment, "[^" .. auto_delimiter .. manual_delimiter .. "]+") do
local pinyin = segment:match("^[^;]+")
if pinyin then
pinyin = pinyin:gsub("[%[%]]", "") --去掉英文词库编码中的[]
table.insert(pinyin_segments, pinyin)
end
end
-- 替换逻辑
local pinyin_index = 1
for i, part in ipairs(input_parts) do
if part == auto_delimiter or part == manual_delimiter then
input_parts[i] = visual_delim
else
local body, tone = part:match("([%a]+)([^%a]+)") --后面加号很必要
local py = pinyin_segments[pinyin_index]
if py then
if is_wanxiang_pro then
input_parts[i] = py
pinyin_index = pinyin_index + 1
elseif i == #input_parts and #part == 1 then
local prefix = py:sub(1, 2)
local first_char = part:sub(1,1):lower()
if first_char == "s" or first_char == "c" or first_char == "z" then
input_parts[i] = part
else
if prefix == "zh" or prefix == "ch" or prefix == "sh" then
input_parts[i] = prefix
else
input_parts[i] = part
end
end
else
if tone_isolate then
input_parts[i] = py .. (tone or "")
else
input_parts[i] = py
end
pinyin_index = pinyin_index + 1
end
end
end
end
if is_full_pinyin then
for idx, part in ipairs(input_parts) do
input_parts[idx] = remove_pinyin_tone(part)
end
end
genuine_cand.preedit = table.concat(input_parts)
end
::after_preedit::
apply_tone_preedit(env, genuine_cand)
if should_skip_candidate_comment then
yield(genuine_cand)
goto continue
end
-- 进入注释处理阶段
-- ① 辅助码注释或者声调注释
if is_comment_hint then
local fz_comment = get_fz_comment(cand, env, initial_comment)
if fz_comment then
final_comment = fz_comment
end
elseif is_tone_comment then
local fz_comment = get_fz_comment(cand, env, initial_comment)
if fz_comment then
final_comment = fz_comment
end
else
final_comment = ""
end
-- ② 拆分注释
if is_chaifen_enabled then
local cf_comment = CF.get_comment(cand, env)
if cf_comment and cf_comment ~= "" then --不为空很重要
final_comment = cf_comment
end
end
-- ③ 错音错字提示
if env.settings.corrector_enabled then
local cr_comment = CR.get_comment(cand)
if cr_comment and cr_comment ~= "" then
final_comment = cr_comment
end
end
-- ④ 反查模式提示
if is_radical_mode then
local az_comment = get_az_comment(cand, env, initial_comment)
if az_comment and az_comment ~= "" then
final_comment = az_comment
end
end
-- 应用注释
if final_comment ~= initial_comment then
genuine_cand.comment = final_comment
end
yield(genuine_cand)
::continue::
end
end
return ZH