-- lua/super_english.lua -- https://github.com/amzxyz/rime_wanxiang -- @description: 英文全能处理器 (Fix: 动态分隔符兼容) -- @author: amzxyz -- -- 核心功能清单: -- 1. [Format] 语句级英文大写格式化,逐词大小写对应 (look HELLO -> look HELLO) -- 2. [Spacing] 智能语句空格切分,智能单词上屏加空格 (Smart Spacing) 与无损分词还原 -- 3. [Memory] 全量历史缓存,完美解决回删乱码问题 -- 4. [Construct] 原生优先构造策略 (短词无分词则重置为原生输入) -- 5. [Order] 单字母(a/A) 智能插队排序,补齐单字母候选 local F = {} -- 引入常用函数 local byte = string.byte local find = string.find local gsub = string.gsub local upper = string.upper local lower = string.lower local sub = string.sub local match = string.match local format = string.format --==================================================== -- 1. 基础工具函数 --==================================================== local function pure(s) return gsub(s, "[^a-zA-Z]", ""):lower() end local function is_ascii_phrase_fast(s) if s == "" then return false end for i = 1, #s do local b = byte(s, i) if b > 127 then return false end end return true end local function has_letters(s) return find(s, "[a-zA-Z]") end -- 序列匹配:返回 (首字母位置, 最后一个匹配字符的位置) local function find_target_in_text(text, start_pos, target_fp) local text_len = #text local target_len = #target_fp if target_len == 0 then return nil, nil end local t_idx = 1 local scan_p = start_pos local s_index = nil while scan_p <= text_len and t_idx <= target_len do local char_txt = sub(text, scan_p, scan_p) if lower(char_txt) == sub(target_fp, t_idx, t_idx) then if t_idx == 1 then s_index = scan_p end t_idx = t_idx + 1 end scan_p = scan_p + 1 end if t_idx > target_len then return s_index, scan_p - 1 end return nil, nil end --==================================================== -- 2. 核心逻辑:格式化与还原 --==================================================== -- [核心修复] 使用锚点切分法 local function restore_sentence_spacing(cand, split_pattern, check_pattern) local guide = cand.preedit or "" -- 1. 只有存在分隔符时才介入 if not find(guide, check_pattern) then return cand end local text = cand.text -- 2. 提取所有目标片段 (hi'vcs -> {hi, vcs}) local targets = {} for seg in string.gmatch(guide, split_pattern) do local t = pure(seg) if #t > 0 then table.insert(targets, t) end end if #targets == 0 then return cand end -- 3. 寻找所有片段在 text 中的“起始锚点” local starts = {} local p = 1 for _, target in ipairs(targets) do -- 注意:这里只需要 s (起始位置) 和 e (用于更新搜索进度) local s, e = find_target_in_text(text, p, target) if not s then -- 如果任何一段对不上 (说明 preedit 和 text 不匹配),则放弃处理,原样返回 return cand end table.insert(starts, s) p = e + 1 end -- 4. 根据锚点进行切分 local parts = {} -- 处理第一段之前的残留文本 (如果有) if starts[1] > 1 then table.insert(parts, sub(text, 1, starts[1] - 1)) end for i = 1, #starts do local current_s = starts[i] local next_s = starts[i+1] -- 下一段的起点 local chunk_end if next_s then -- 如果有下一段,当前段结束于下一段起点之前 chunk_end = next_s - 1 else -- 如果是最后一段,一直延伸到文本末尾 (修复了 vcs -> ystem 丢失的问题) chunk_end = #text end table.insert(parts, sub(text, current_s, chunk_end)) end -- 5. 拼接并清理多余空格 local new_text = table.concat(parts, " ") new_text = gsub(new_text, "%s%s+", " ") if new_text == "" then return cand end local nc = Candidate(cand.type, cand.start, cand._end, new_text, cand.comment) nc.preedit = cand.preedit return nc end local NBSP = string.char(0xC2, 0xA0) local function apply_segment_formatting(text, input_code) if not input_code or input_code == "" then return text end local parts = {} local p_code = 1 for word in string.gmatch(text, "%S+") do local clean_word = pure(word) local w_len = #clean_word if w_len > 0 then if find(word, "[\128-\255]") then local input_remain = #input_code - p_code + 1 if input_remain > 0 then local check_len = (w_len < input_remain) and w_len or input_remain p_code = p_code + check_len end else local input_remain = #input_code - p_code + 1 if input_remain > 0 then local check_len = (w_len < input_remain) and w_len or input_remain local segment = sub(input_code, p_code, p_code + check_len - 1) local is_pure_alpha = not find(word, "[^a-zA-Z]") if find(segment, "^%u%u") and is_pure_alpha then word = upper(word) elseif find(segment, "^%u") then word = gsub(word, "^%a", upper) end p_code = p_code + check_len end end end table.insert(parts, word) end return table.concat(parts, " ") end local function apply_formatting(cand, code_ctx) local text = cand.text if not text or text == "" then return cand end local changed = false local norm = gsub(text, NBSP, " ") if norm ~= text then text = norm; changed = true end if is_ascii_phrase_fast(text) and has_letters(text) then if code_ctx.raw_input then local new_text = apply_segment_formatting(text, code_ctx.raw_input) if new_text ~= text then text = new_text changed = true end end if code_ctx.spacing_mode and code_ctx.spacing_mode ~= "off" then local mode = code_ctx.spacing_mode if mode == "smart" then if code_ctx.prev_is_eng then if not find(text, "^%s") then text = " " .. text; changed = true end end elseif mode == "before" then if not find(text, "^%s") then text = " " .. text; changed = true end elseif mode == "after" then if not find(text, "%s$") then text = text .. " "; changed = true end end end end if not changed then return cand end local nc = Candidate(cand.type, cand.start, cand._end, text, cand.comment) nc.preedit = cand.preedit return nc end --==================================================== -- 3. 状态管理 (Filter) --==================================================== function F.init(env) env.memory = {} local cfg = env.engine.schema.config env.english_spacing_mode = "off" if cfg then local str = cfg:get_string("english_spacing") if str then env.english_spacing_mode = str end end -- 读取分隔符 (兼容空格和自定义符号) local delimiter_str = " '" if cfg then delimiter_str = cfg:get_string('speller/delimiter') or delimiter_str end local escaped_delims = gsub(delimiter_str, "([%%%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1") env.split_pattern = "[^" .. escaped_delims .. "]+" env.delim_check_pattern = "[" .. escaped_delims .. "]" env.prev_commit_is_eng = false if env.engine.context then env.commit_notifier = env.engine.context.commit_notifier:connect(function(ctx) local commit_text = ctx:get_commit_text() local is_eng = is_ascii_phrase_fast(commit_text) if not is_eng then local clean = gsub(commit_text, "%s+$", "") if clean == "," or clean == "." or clean == "!" or clean == "?" then is_eng = true end end env.prev_commit_is_eng = is_eng ctx:set_property("english_spacing", "") end) end end function F.fini(env) if env.commit_notifier then env.commit_notifier:disconnect(); env.commit_notifier = nil end env.memory = nil end --==================================================== -- 4. 主逻辑 (Filter) --==================================================== function F.func(input, env) local ctx = env.engine.context local curr_input = ctx.input local has_valid_candidate = false local best_candidate_saved = false local code_len = #curr_input local break_signal = (ctx:get_property("english_spacing") == "true") local effective_prev_is_eng = env.prev_commit_is_eng if break_signal then effective_prev_is_eng = false end local code_ctx = { raw_input = curr_input, spacing_mode = env.english_spacing_mode, prev_is_eng = effective_prev_is_eng } local single_char_injected = false local c_lower, c_upper = nil, nil if code_len == 1 then local b = byte(curr_input) if (b >= 65 and b <= 90) or (b >= 97 and b <= 122) then local lower_t = lower(curr_input) local upper_t = upper(curr_input) c_lower = Candidate("completion", 0, 1, lower_t, "") c_upper = Candidate("completion", 0, 1, upper_t, "") else single_char_injected = true end else single_char_injected = true end for cand in input:iter() do -- 传入 Pattern 进行智能还原 local good_cand = restore_sentence_spacing(cand, env.split_pattern, env.delim_check_pattern) local fmt_cand = apply_formatting(good_cand, code_ctx) local is_ascii = is_ascii_phrase_fast(fmt_cand.text) if not single_char_injected and is_ascii and c_lower then if not best_candidate_saved then env.memory[curr_input] = { text = c_lower.text, preedit = c_lower.text } best_candidate_saved = true end yield(c_lower) yield(c_upper) single_char_injected = true has_valid_candidate = true end local is_garbage = (cand.type == "raw") or (fmt_cand.text == curr_input) if not is_garbage then has_valid_candidate = true if not best_candidate_saved and cand.comment ~= "~" then env.memory[curr_input] = { text = fmt_cand.text, preedit = fmt_cand.preedit or fmt_cand.text } best_candidate_saved = true end end yield(fmt_cand) end if not single_char_injected and c_lower then if not best_candidate_saved then env.memory[curr_input] = { text = c_lower.text, preedit = c_lower.text } best_candidate_saved = true end yield(c_lower) yield(c_upper) has_valid_candidate = true end -- [Phase 3] 构造补全 if not has_valid_candidate then if not has_letters(curr_input) then return end local anchor = nil local diff = "" for i = #curr_input - 1, 1, -1 do local prefix = sub(curr_input, 1, i) if env.memory[prefix] then anchor = env.memory[prefix] diff = sub(curr_input, i + 1) break end end if anchor and diff ~= "" then local has_spacing = find(anchor.text, " ") local last_word = match(anchor.text, "(%S+)%s*$") or "" local last_len = #last_word local output_text = "" local output_preedit = "" if has_spacing then output_text = anchor.text .. diff output_preedit = (anchor.preedit or anchor.text) .. diff elseif last_len > 3 then local spacer = " " if sub(anchor.text, -1) == " " then spacer = "" end output_text = anchor.text .. spacer .. diff output_preedit = (anchor.preedit or anchor.text) .. spacer .. diff else output_text = curr_input output_preedit = curr_input end output_text = apply_segment_formatting(output_text, curr_input) local cand = Candidate("completion", 0, #curr_input, output_text, "~") cand.preedit = output_preedit cand.quality = 9999999 yield(cand) else local cand = Candidate("completion", 0, #curr_input, curr_input, "~") cand.preedit = curr_input yield(cand) end end end return F