-- super_segmentation.lua --@amzxyz https://github.com/amzxyz/rime_wanxiang -- 规则: -- 1) 第 1 个 ':仅记录“现场”(baseline_head=当前整段输入,含你之前的手动分隔),记录起点索引,不重建 -- 2) 第 2 个 ' 起:开始循环 -- - 命中起点 s:只循环 s 后面的 m-1 个形态(跳过 s 本身) -- - 未命中:从 all[1] 开始循环 m 个形态 -- 3) 走完一圈:恢复到 baseline_head,并尾部只保留 1 个 ' -- 4) 支持 N=3..8(可扩展 PATTERNS) -- 5) 使用 update_notifier 预缓存可见分段,避免移动端“晚一拍” local K_REJECT, K_ACCEPT, K_NOOP = 0, 1, 2 local M = {} -- ---------- utils ---------- local function escp(ch) return ch:gsub("(%W)","%%%1") end local function sum(a) local s=0; for _,v in ipairs(a) do s=s+v end; return s end local function key_of(a) return table.concat(a, ",") end local function find_idx(list, key) for i,t in ipairs(list) do if key_of(t)==key then return i end end end local function count_trailing(s, ch) local n=0; for i=#s,1,-1 do if s:sub(i,i)==ch then n=n+1 else break end end; return n end local function strip_trailing(s, ch) return (s:gsub(escp(ch).."+$","")) end -- 去掉手动与自动分隔符,得到“纯编码” local function strip_delims(s, md, ad) if md and md~="" then s = s:gsub(escp(md),"") end if ad and ad~="" then s = s:gsub(escp(ad),"") end return s end -- 依据分组把 core 插入手动分隔符重建 local function build_by_groups(core, ch_manual, groups) if not groups or #groups==0 or sum(groups)~=#core then return core end local out, i = {}, 1 for gi,g in ipairs(groups) do out[#out+1] = core:sub(i, i+g-1); i = i + g if gi < #groups then out[#out+1] = ch_manual end end return table.concat(out) end -- 从字符串解析分段长度(空格或 ' 都视为可见分隔) local function lens_from_string(s, md, ad) if not s or s=="" then return nil end local segs, buf = {}, {} local function flush() if #buf>0 then segs[#segs+1]=table.concat(buf); buf={} end end for i=1,#s do local c=s:sub(i,i) if c==md or c==ad or c==" " then flush() else local b=string.byte(c) if b and ((b>=65 and b<=90) or (b>=97 and b<=122)) then buf[#buf+1]=string.char(b):lower() end end end flush() if #segs==0 then return nil end local L={}; for _,seg in ipairs(segs) do L[#L+1]=#seg end return L end -- —— 缓存读取:优先用通知器缓存的 lens,其次现场计算 —— local function get_cached_lens(env, ctx, md, ad) local L = env._last_preedit_lens if L and type(L)=="table" and #L>0 then return L end local seg = ctx.composition:back() local cand = seg and seg:get_selected_candidate() or nil return lens_from_string(cand and cand.preedit or nil, md, ad) end -- ---------- patterns ---------- local PATTERNS = { [3] = { all = { {2,1}, {1,2} } }, [4] = { all = { {2,2}, {1,3}, {3,1} } }, [5] = { all = { {2,3}, {3,2} } }, [6] = { all = { {2,2,2}, {3,3} } }, [7] = { all = { {2,2,3}, {2,3,2}, {3,2,2} } }, [8] = { all = { {2,2,2,2}, {2,3,3}, {3,2,3}, {3,3,2} } }, [10] = { all = { {2,2,2,2,2} } }, [12] = { all = { {2,2,2,2,2,2} } }, } -- ---------- session state ---------- local function reset_session(env) env._ss_core_letters = nil -- 纯编码(去分隔) env._ss_start_idx = nil -- 起点索引(1..m),未命中则 0 env._ss_N = nil env._ss_baseline_head = nil -- 基线:包含你之前的手动分隔/空格 end local function ulen(s) if not s or s == "" then return 0 end if utf8 and utf8.len then local ok, n = pcall(utf8.len, s) if ok and n then return n end end -- 兜底:简单按 UTF-8 码点数 local n = 0 if utf8 and utf8.codes then for _ in utf8.codes(s) do n = n + 1 end return n end -- 再兜底:直接 #s(有误差,但总比没有好) return #s end function M.init(env) local cfg = env.engine.schema.config local delimiter = cfg:get_string("speller/delimiter") or " '" if #delimiter < 2 then delimiter = " '" end env.auto_delim = delimiter:sub(1,1) -- 通常空格 env.manual_delim = delimiter:sub(2,2) -- 通常单引号 -- 缓存最新一帧的可见分段与输入 env._upd_conn = env.engine.context.update_notifier:connect(function(ctx) local seg = ctx.composition:back() local cand = seg and seg:get_selected_candidate() or nil local pre = cand and cand.preedit or nil env._last_preedit_lens = lens_from_string(pre, env.manual_delim, env.auto_delim) env._last_input_head = ctx.input env._last_input_for_caret = ctx.input env._last_caret_pos = ctx.caret_pos end) reset_session(env) end function M.fini(env) if env._upd_conn then env._upd_conn:disconnect(); env._upd_conn=nil end end -- ---------- main ---------- function M.func(key_event, env) if key_event:release() then return K_NOOP end local ctx = env.engine.context if ctx.composition:empty() then return K_NOOP end local md = env.manual_delim or "'" local ad = env.auto_delim or " " -- 只处理手动分隔符键 if key_event.keycode ~= string.byte(md) then reset_session(env); return K_NOOP end --用「上一帧」的光标位置判断是不是在中间编辑 do local last_input = env._last_input_for_caret or ctx.input or "" local last_caret = env._last_caret_pos local total_len = ulen(last_input) -- 只有「上一帧光标在末尾」我们才认定在玩超分段 if not last_caret or last_caret ~= total_len then -- 上一帧光标不在末尾:说明用户在中间编辑,这次 ' 交给默认逻辑 reset_session(env) return K_NOOP end end -- 把这次 ' 并入输入,统计尾部 ' 数 local before = ctx.input or "" local after = before .. md local tlen = count_trailing(after, md) -- 去掉末尾 ' 串,得到 head(本次按键前的完整输入)与 core(纯编码) local head = strip_trailing(after, md) local core = strip_delims(head, md, ad) local N = #core local conf = PATTERNS[N] -- 若核心/长度变化,重置会话 if env._ss_core_letters ~= core or env._ss_N ~= N then env._ss_core_letters = core env._ss_N = N env._ss_start_idx = nil env._ss_baseline_head = nil end -- 只要本轮还没记过,就立刻记录“基线 + 起点”(无论 tlen==1 还是 tlen>=2) if env._ss_baseline_head == nil then env._ss_baseline_head = head -- 保留你原有的空格或手动 ' end if conf and env._ss_start_idx == nil then local start_idx = 0 -- 先用缓存的可见分段;不行就直接用 head 切分(可避免“23 又走到 23'”的伪步骤) local L = get_cached_lens(env, ctx, md, ad) if not (L and sum(L)==N) then L = lens_from_string(head, md, ad) end if L and sum(L)==N then local idx = find_idx(conf.all, key_of(L)) if idx then start_idx = idx end end env._ss_start_idx = start_idx end -- 第 1 个 ' :仅记录,不重建 if tlen == 1 then ctx.input = after return K_ACCEPT end -- 第 2 个 ' 起:循环(若无该长度配置,直接接纳输入) if not conf then ctx.input = after return K_ACCEPT end local m = #conf.all local k = tlen - 1 -- 从第二个 ' 开始计数 -- 恢复:回到第一拍记录的 baseline(保留空格/已有 '),尾部只留 1 个 ' local function restore() local baseline = env._ss_baseline_head or head ctx.input = baseline .. md reset_session(env) env._ss_core_letters = core env._ss_N = N end if env._ss_start_idx and env._ss_start_idx ~= 0 then -- 命中起点:只循环后续 m-1 个形态,跳过当前形态 local variants_count = m - 1 local cycle_len = variants_count + 1 local r = k % cycle_len if r == 0 then restore(); return K_ACCEPT else local idx = ((env._ss_start_idx - 1 + r) % m) + 1 -- 跳过起点本身 local groups = conf.all[idx] local rebuilt = build_by_groups(core, md, groups) ctx.input = rebuilt .. md:rep(tlen) return K_ACCEPT end else -- 未命中起点:从 all[1] 开始循环 m 个形态 local variants_count = m local cycle_len = variants_count + 1 local r = k % cycle_len if r == 0 then restore(); return K_ACCEPT else local idx = ((r - 1) % m) + 1 local groups = conf.all[idx] local rebuilt = build_by_groups(core, md, groups) ctx.input = rebuilt .. md:rep(tlen) return K_ACCEPT end end end return { init = M.init, fini = M.fini, func = M.func }