fix: 再次调整英文处理器

This commit is contained in:
amzxyz
2026-01-20 00:36:33 +08:00
parent 3bd3b7cd79
commit 73484df7db

View File

@@ -12,7 +12,6 @@
local F = {}
-- 引入常用函数
local byte = string.byte
local find = string.find
local gsub = string.gsub
@@ -21,8 +20,8 @@ local lower = string.lower
local sub = string.sub
local match = string.match
local format = string.format
local STICKY_BUFFER_SIZE = 2 --输入/\的情况下继续输入3个单词不加空格适合网址路径
-- 辅助函数:获取候选类型
local STICKY_BUFFER_SIZE = 2
local function fast_type(c)
local t = c.type
if t then return t end
@@ -30,58 +29,48 @@ local function fast_type(c)
return (g and g.type) or ""
end
-- 辅助函数:判断是否为置顶表词汇
local function is_table_type(c)
local t = fast_type(c)
return t == "user_table" or t == "fixed"
end
-- [Time] 封装统一的时间获取函数 (单位: 秒, 带小数)
local function get_now()
-- 使用用户指定的原生 API (毫秒转秒,以便和配置文件里的 0.5 秒兼容)
if rime_api and rime_api.get_time_ms then
return rime_api.get_time_ms() / 1000
end
--以此为保底,防止 API 不存在时报错
return os.time()
end
local function pure(s)
return gsub(s, "[^a-zA-Z]", ""):lower()
end
local no_spacing_words = {
["http"] = true,
["https"] = true,
["www"] = true,
["ftp"] = true,
["ssh"] = true,
["mailto"]= true,
["file"] = true,
["tel"] = true,
["http"] = true, ["https"] = true, ["www"] = true, ["ftp"] = true,
["ssh"] = true, ["mailto"]= true, ["file"] = true, ["tel"] = true,
}
local allowed_ascii_symbols = {
[32] = true, -- space
[33] = true, -- !
[39] = true, -- ' (Don't)
[39] = true, -- '
[44] = true, -- ,
[45] = true, -- - (Co-op)
[45] = true, -- -
[43] = true, -- +
[46] = true, -- .
[63] = true, -- ?
[92] = true, -- \
-- 数字 0-9 (ASCII 48-57)
[48]=true, [49]=true, [50]=true, [51]=true, [52]=true,
[53]=true, [54]=true, [55]=true, [56]=true, [57]=true,
}
-- 规则:只允许 字母(A-Za-z) 和 上面配置表里的符号
local function is_ascii_phrase_fast(s)
if not s or s == "" then return false end
local len = #s
for i = 1, len do
local b = byte(s, i)
-- 1. 判断是否为大写字母 A-Z (65-90)
local is_upper = (b >= 65 and b <= 90)
-- 2. 判断是否为小写字母 a-z (97-122)
local is_lower = (b >= 97 and b <= 122)
-- 3. 判断是否为白名单符号
local is_allowed_sym = allowed_ascii_symbols[b]
if not (is_upper or is_lower or is_allowed_sym) then
return false
@@ -94,16 +83,13 @@ local function has_letters(s)
return find(s, "[a-zA-Z]")
end
-- 序列匹配:返回 (首字母位置, 最后一个匹配字符的位置)
local function find_target_in_text(text, start_pos, target_fp)
local text_len = #text
local target_len = #target_fp
if target_len == 0 then return nil, nil end
local t_idx = 1
local scan_p = start_pos
local s_index = nil
local t_idx = 1
local scan_p = start_pos
local s_index = nil
while scan_p <= text_len and t_idx <= target_len do
local char_txt = sub(text, scan_p, scan_p)
if lower(char_txt) == sub(target_fp, t_idx, t_idx) then
@@ -112,18 +98,15 @@ local function find_target_in_text(text, start_pos, target_fp)
end
scan_p = scan_p + 1
end
if t_idx > target_len then
return s_index, scan_p - 1
end
return nil, nil
end
-- 2. 核心逻辑:格式化与还原
local function restore_sentence_spacing(cand, split_pattern, check_pattern)
local guide = cand.preedit or ""
if not find(guide, check_pattern) then return cand end
local text = cand.text
local targets = {}
for seg in string.gmatch(guide, split_pattern) do
@@ -131,7 +114,6 @@ local function restore_sentence_spacing(cand, split_pattern, check_pattern)
if #t > 0 then table.insert(targets, t) end
end
if #targets == 0 then return cand end
local starts = {}
local p = 1
for _, target in ipairs(targets) do
@@ -140,19 +122,16 @@ local function restore_sentence_spacing(cand, split_pattern, check_pattern)
table.insert(starts, s)
p = e + 1
end
local parts = {}
if starts[1] > 1 then
table.insert(parts, sub(text, 1, starts[1] - 1))
end
for i = 1, #starts do
local current_s = starts[i]
local next_s = starts[i+1]
local chunk_end = next_s and (next_s - 1) or #text
table.insert(parts, sub(text, current_s, chunk_end))
end
local new_text = ""
for i, part in ipairs(parts) do
if i == 1 then
@@ -167,9 +146,7 @@ local function restore_sentence_spacing(cand, split_pattern, check_pattern)
end
end
new_text = gsub(new_text, "%s%s+", " ")
if new_text == "" then return cand end
local nc = Candidate(cand.type, cand.start, cand._end, new_text, cand.comment)
nc.preedit = cand.preedit
return nc
@@ -179,14 +156,11 @@ local NBSP = string.char(0xC2, 0xA0)
local function apply_segment_formatting(text, input_code)
if not input_code or input_code == "" then return text end
local parts = {}
local p_code = 1
for word in string.gmatch(text, "%S+") do
local clean_word = pure(word)
local w_len = #clean_word
if w_len > 0 then
if find(word, "[\128-\255]") then
local input_remain = #input_code - p_code + 1
@@ -200,7 +174,6 @@ local function apply_segment_formatting(text, input_code)
local check_len = (w_len < input_remain) and w_len or input_remain
local segment = sub(input_code, p_code, p_code + check_len - 1)
local is_pure_alpha = not find(word, "[^a-zA-Z]")
if find(segment, "^%u%u") and is_pure_alpha then
word = upper(word)
elseif find(segment, "^%u") then
@@ -212,7 +185,6 @@ local function apply_segment_formatting(text, input_code)
end
table.insert(parts, word)
end
return table.concat(parts, " ")
end
@@ -220,19 +192,13 @@ local function apply_formatting(cand, code_ctx)
local text = cand.text
if not text or text == "" then return cand end
local changed = false
local norm = gsub(text, NBSP, " ")
if norm ~= text then text = norm; changed = true end
if is_ascii_phrase_fast(text) and has_letters(text) then
if code_ctx.raw_input then
local new_text = apply_segment_formatting(text, code_ctx.raw_input)
if new_text ~= text then
text = new_text
changed = true
end
if new_text ~= text then text = new_text; changed = true end
end
if code_ctx.spacing_mode and code_ctx.spacing_mode ~= "off" then
local mode = code_ctx.spacing_mode
if mode == "smart" then
@@ -246,87 +212,69 @@ local function apply_formatting(cand, code_ctx)
end
end
end
if not changed then return cand end
local nc = Candidate(cand.type, cand.start, cand._end, text, cand.comment)
nc.preedit = cand.preedit
return nc
end
-- 3. 状态管理 (Filter)
function F.init(env)
env.memory = {}
local cfg = env.engine.schema.config
-- 1. 配置读取
env.english_spacing_mode = "off"
env.spacing_timeout = 0
env.lookup_key = "`"
if cfg then
local str = cfg:get_string("wanxiang_english/english_spacing")
if str then env.english_spacing_mode = str end
-- 读取超时 (单位: 秒, 支持小数)
local timeout = cfg:get_double("wanxiang_english/spacing_timeout")
if timeout then env.spacing_timeout = timeout end
local key = cfg:get_string("wanxiang_lookup/key")
if key and key ~= "" then env.lookup_key = key end
end
env.lookup_key_esc = gsub(env.lookup_key, "([%%%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1")
-- 2. 动态获取分隔符
local delimiter_str = " '"
if cfg then
delimiter_str = cfg:get_string('speller/delimiter') or delimiter_str
end
env.delimiter_char = sub(delimiter_str, 1, 1) --提取自动分词符号
env.delimiter_char = sub(delimiter_str, 1, 1)
local escaped_delims = gsub(delimiter_str, "([%%%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1")
env.split_pattern = "[^" .. escaped_delims .. "]+"
env.delim_check_pattern = "[" .. escaped_delims .. "]"
env.prev_commit_is_eng = false
env.last_commit_time = 0 --记录上次提交时间
env.comp_start_time = nil -- 记录本次输入开始的时间
env.last_commit_time = 0
env.comp_start_time = nil
env.spacing_active = false
env.decision_locked = false
env.sticky_countdown = 0 -- 粘性倒计时
env.sticky_countdown = 0
if env.engine.context then
env.update_notifier = env.engine.context.update_notifier:connect(function(ctx)
local curr_input = ctx.input
-- 检测当前输入是否包含反查符
if env.lookup_key and find(curr_input, env.lookup_key, 1, true) then
env.block_derivation = true
else
env.block_derivation = false
end
-- 如果输入框为空,重置开始时间
if curr_input == "" then
env.comp_start_time = nil
-- 如果输入框不为空,且还没记录开始时间,说明是“刚刚开始打字”
elseif env.comp_start_time == nil then
env.comp_start_time = get_now()
end
end)
env.commit_notifier = env.engine.context.commit_notifier:connect(function(ctx)
local commit_text = ctx:get_commit_text()
-- 1. 先剔除空格,防止死循环
local text_no_space = gsub(commit_text, "%s", "")
local is_eng = is_ascii_phrase_fast(text_no_space)
-- 2. 粘性触发 (结尾是 / 或 \)
if find(text_no_space, "[/\\\\]$") then
env.sticky_countdown = STICKY_BUFFER_SIZE
is_eng = false
-- 3. 粘性缓冲期 (倒计时)
elseif env.sticky_countdown > 0 then
if is_eng then
-- 只要是英文,消耗一次缓冲,并强制不加空格
env.sticky_countdown = env.sticky_countdown - 1
is_eng = false
else
-- 遇到非英文(中文等),打断缓冲
env.sticky_countdown = 0
end
-- 4. 普通黑名单 (http等)
elseif is_eng then
local clean = gsub(commit_text, "%s+$", ""):lower()
if no_spacing_words[clean] then
@@ -334,7 +282,6 @@ function F.init(env)
end
end
env.prev_commit_is_eng = is_eng
-- 仅英文上屏更新时间戳 (使用 rime_api 获取)
if is_eng then
env.last_commit_time = get_now()
else
@@ -345,14 +292,12 @@ function F.init(env)
end)
end
end
function F.fini(env)
if env.update_notifier then env.update_notifier:disconnect(); env.update_notifier = nil end
if env.commit_notifier then env.commit_notifier:disconnect(); env.commit_notifier = nil end
env.memory = nil
end
-- 4. 主逻辑 (Filter)
function F.func(input, env)
local ctx = env.engine.context
local curr_input = ctx.input
@@ -360,7 +305,7 @@ function F.func(input, env)
local best_candidate_saved = false
local code_len = #curr_input
-- [Feature] 强制英文造词 (末尾 \\)
-- [Feature] 强制英文造词
if code_len > 2 and sub(curr_input, -2) == "\\\\" then
local raw_text = sub(curr_input, 1, code_len - 2)
if is_ascii_phrase_fast(raw_text) then
@@ -374,15 +319,12 @@ function F.func(input, env)
end
end
-- [Check 1] 外部脚本发来的打断信号
local break_signal = (ctx:get_property("english_spacing") == "true")
local effective_prev_is_eng = env.prev_commit_is_eng
if break_signal then
effective_prev_is_eng = false
env.prev_commit_is_eng = false
-- [Check 2] 时间自然过期
elseif effective_prev_is_eng and env.spacing_timeout > 0 then
local check_time = env.comp_start_time or get_now()
if (check_time - env.last_commit_time) > env.spacing_timeout then
@@ -397,16 +339,13 @@ function F.func(input, env)
prev_is_eng = effective_prev_is_eng
}
-- 1. 准备单字母候选
local single_char_injected = false
local single_chars = {}
local has_single_chars = false
local single_char_injected = false
if code_len == 1 then
local b = byte(curr_input)
local is_upper = (b >= 65 and b <= 90)
local is_lower = (b >= 97 and b <= 122)
-- 只有输入是字母时,才准备 A/a 候选
if is_upper or is_lower then
local t1 = curr_input
local t2 = is_upper and lower(curr_input) or upper(curr_input)
@@ -418,50 +357,50 @@ function F.func(input, env)
single_char_injected = true
end
-- 2. 流式遍历
for cand in input:iter() do
local c_type = cand.type
local final_cand = cand
local is_ascii = false
if c_type ~= "phrase" then
local good_cand = restore_sentence_spacing(cand, env.split_pattern, env.delim_check_pattern)
final_cand = apply_formatting(good_cand, code_ctx)
is_ascii = is_ascii_phrase_fast(final_cand.text)
end
if final_cand.comment and find(final_cand.comment, "\226\152\175") then
local nc = Candidate(final_cand.type, final_cand.start, final_cand._end, final_cand.text, "")
nc.preedit = final_cand.preedit
final_cand = nc
end
local is_garbage = (c_type == "raw")
local good_cand = restore_sentence_spacing(cand, env.split_pattern, env.delim_check_pattern)
local fmt_cand = apply_formatting(good_cand, code_ctx)
-- [恢复] 去除注释中的太极符号
if fmt_cand.comment and find(fmt_cand.comment, "\226\152\175") then
local nc = Candidate(fmt_cand.type, fmt_cand.start, fmt_cand._end, fmt_cand.text, "")
nc.preedit = fmt_cand.preedit
fmt_cand = nc
end
local c_type = cand.type
local is_ascii = is_ascii_phrase_fast(fmt_cand.text)
local is_tbl = is_table_type(cand)
-- [垃圾词判定]:保护符号,只去重单字母
local is_garbage = (c_type == "raw")
if not is_garbage and code_len == 1 and has_letters(curr_input) then
if lower(final_cand.text) == lower(curr_input) then
if lower(fmt_cand.text) == lower(curr_input) then
is_garbage = true
end
end
if not is_garbage then
has_valid_candidate = true
-- [VIP 优先逻辑]
local is_vip_type = (c_type == "user_table" or c_type == "fixed" or c_type == "phrase")
local is_hidden_vip = (not is_vip_type) and (not is_ascii)
local treat_as_vip = is_vip_type or is_hidden_vip
if treat_as_vip then
-- VIP 通道 (汉字、符号、用户词)
-- VIP 通道:不仅是 user_table包括汉字等都直接输出不让单字母插队
if not best_candidate_saved and cand.comment ~= "~" and not env.block_derivation then
env.memory[curr_input] = {
text = final_cand.text,
preedit = final_cand.preedit or curr_input
text = fmt_cand.text,
preedit = fmt_cand.preedit or curr_input
}
best_candidate_saved = true
end
yield(final_cand)
yield(fmt_cand)
else
-- 普通通道 (英文插队)
-- 普通通道:允许单字母插队到前面
if has_single_chars and not single_char_injected then
if not best_candidate_saved then
env.memory[curr_input] = { text = single_chars[1].text, preedit = curr_input }
@@ -474,86 +413,91 @@ function F.func(input, env)
if not best_candidate_saved and cand.comment ~= "~" and not env.block_derivation then
env.memory[curr_input] = {
text = final_cand.text,
preedit = final_cand.preedit or curr_input
text = fmt_cand.text,
preedit = fmt_cand.preedit or curr_input
}
best_candidate_saved = true
end
yield(final_cand)
yield(fmt_cand)
end
end
end
-- 3. 兜底逻辑
-- 3. 兜底逻辑 (补单字母)
if has_single_chars and not single_char_injected then
if not best_candidate_saved then
env.memory[curr_input] = { text = single_chars[1].text, preedit = curr_input }
env.memory[curr_input] = { text = single_chars[1].text, preedit = single_chars[1].text }
best_candidate_saved = true
end
for _, c in ipairs(single_chars) do yield(c) end
has_valid_candidate = true
end
-- [Phase 3] 构造补全
-- [Phase 3] 历史回溯构造 (Strictly fallback)
-- [恢复功能] 无候选时,尝试从历史构造
if not has_valid_candidate then
if env.block_derivation then return end
if find(curr_input, "^[/]") then return end
if not has_letters(curr_input) then return end
local anchor = nil
local diff = ""
for i = #curr_input - 1, 1, -1 do
local prefix = sub(curr_input, 1, i)
if env.memory[prefix] then
anchor = env.memory[prefix]
diff = sub(curr_input, i + 1)
break
if not env.block_derivation and has_letters(curr_input) and not find(curr_input, "^[/]") then
local anchor = nil
local diff = ""
for i = #curr_input - 1, 1, -1 do
local prefix = sub(curr_input, 1, i)
if env.memory[prefix] then
anchor = env.memory[prefix]
diff = sub(curr_input, i + 1)
break
end
end
end
if anchor and diff ~= "" then
local has_spacing = find(anchor.text, " ")
local last_word = match(anchor.text, "(%S+)%s*$") or ""
local last_len = #last_word
local output_text = ""
local output_preedit = ""
local is_code_mode = find(curr_input, "^[/\\]")
if is_ascii_phrase_fast(anchor.text) then
if has_spacing then
output_text = anchor.text .. diff
output_preedit = (anchor.preedit or anchor.text) .. diff
elseif last_len > 3 then
if anchor and diff ~= "" then
local has_spacing = find(anchor.text, " ")
local last_word = match(anchor.text, "(%S+)%s*$") or ""
local last_len = #last_word
local output_text = ""
local output_preedit = ""
local is_code_mode = find(curr_input, "^[/\\]")
if is_ascii_phrase_fast(anchor.text) then
local spacer = " "
if sub(anchor.text, -1) == " " then spacer = "" end
output_text = anchor.text .. spacer .. diff
output_preedit = (anchor.preedit or anchor.text) .. spacer .. diff
if has_spacing then
output_text = anchor.text .. spacer .. diff
output_preedit = (anchor.preedit or anchor.text) .. spacer .. diff
elseif last_len > 3 then
output_text = anchor.text .. spacer .. diff
output_preedit = (anchor.preedit or anchor.text) .. spacer .. diff
else
output_text = curr_input
output_preedit = curr_input
end
elseif is_code_mode then
output_text = anchor.text .. diff
output_preedit = (anchor.preedit or anchor.text) .. diff
else
output_text = curr_input
output_preedit = curr_input
output_text = anchor.text
output_preedit = (anchor.preedit or anchor.text) .. env.delimiter_char .. diff
end
elseif is_code_mode then
output_text = anchor.text .. diff
output_preedit = (anchor.preedit or anchor.text) .. diff
output_text = apply_segment_formatting(output_text, curr_input)
local cand = Candidate("completion", 0, #curr_input, output_text, "~")
cand.preedit = output_preedit
cand.quality = 999
yield(cand)
else
output_text = anchor.text
output_preedit = (anchor.preedit or anchor.text) .. env.delimiter_char .. diff
-- [Phase 4] 真正的无解兜底
local cand = Candidate("completion", 0, #curr_input, curr_input, "~")
cand.preedit = curr_input
yield(cand)
end
output_text = apply_segment_formatting(output_text, curr_input)
local cand = Candidate("completion", 0, #curr_input, output_text, "~")
cand.preedit = output_preedit
cand.quality = 999
yield(cand)
else
local cand = Candidate("completion", 0, #curr_input, curr_input, "~")
cand.preedit = curr_input
yield(cand)
-- 特殊符号或被拦截时的兜底
local cand = Candidate("completion", 0, #curr_input, curr_input, "~")
cand.preedit = curr_input
yield(cand)
end
end
end
return F