From 0417d9dfdbcbfe92ddb1194fb7aa5aabfe731924 Mon Sep 17 00:00:00 2001 From: amzxyz Date: Mon, 5 Jan 2026 10:34:19 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=96=B0=E5=A2=9Eyaml=E6=AD=A3?= =?UTF-8?q?=E5=88=99=E8=BD=AC=E4=B9=89lua=E6=AD=A3=E5=88=99=E6=A8=A1?= =?UTF-8?q?=E5=9D=97=EF=BC=8C=E6=95=B0=E5=AD=97=E9=94=AE=E5=A4=84=E7=90=86?= =?UTF-8?q?=E9=80=BB=E8=BE=91=E5=B0=86=E7=9B=B4=E6=8E=A5=E8=AF=BB=E5=8F=96?= =?UTF-8?q?=E7=8A=B6=E6=80=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- custom/wanxiang_pro.schema.yaml | 48 ++----- lua/kp_number_processor.lua | 177 ++++++------------------ lua/wanxiang.lua | 237 +++++++++++++++++++++++++++++++- wanxiang.schema.yaml | 48 ++----- 4 files changed, 292 insertions(+), 218 deletions(-) diff --git a/custom/wanxiang_pro.schema.yaml b/custom/wanxiang_pro.schema.yaml index abeb8c0..00c5383 100644 --- a/custom/wanxiang_pro.schema.yaml +++ b/custom/wanxiang_pro.schema.yaml @@ -138,6 +138,13 @@ tips: charsetlist: [] charsetblacklist: [] +# 给 kp_number_processor 用的小键盘模式,能自动读取recognizer下面正则与之功能对齐 +kp_number: + kp_number_mode: auto + #小键盘数字处理逻辑 + # "compose" : 小键盘数字始终不上屏参与编码 + # "auto" : 输入中 push,空闲时 commit(默认) + #shijian:仅仅作为提示使用,编码已经写死,引导键可以在key_binder下修改前缀 #时间:osj 或者 /sj #日期:orq 或者 /rq @@ -491,46 +498,7 @@ recognizer: calculator: "^V.*$" #计算器功能引导 email: "^[A-Za-z][-_.0-9A-Za-z]*@.*$" # email @ 之后不上屏 url: "^(www[.]|https?:|ftp[.:]|mailto:|file:).*$|^[a-z]+[.].+$" # URL -# 给 kp_number_processor 用的“命令模式 Lua 正则集合” -# 能够细化哪些情况数字是用来当作输入编码的,不在正则范围的将用于上屏 -# 直接加载上面的正则会遇到不符合预期的情况,毕竟Lua正则逻辑与之不同 -kp_number: - #小键盘数字处理逻辑 - # "compose" : 小键盘数字始终参与编码 - # "auto" : 输入中 push,空闲时 commit(默认) - kp_number_mode: auto - patterns: - # /符号引导模式 - - "^/[0-9]$" - - "^/10$" - - "^/[A-Za-z]+$" - # U模式 - - "^U[%da-f]+$" - # R模式 - - "^R[0-9]+%.?[0-9]*$" - # 时间差 - - "^.rc%d+[-+=op]?$" - # N模式 - # Lua 不支持 {1,8},改成等价写法:N 后 1~8 个数字 - - "^N0[1-9]?0?[1-9]?$" - - "^N1[02]?0?[1-9]?$" - - "^N0[1-9]?[1-2]?[1-9]?$" - - "^N1[02]?[1-2]?[1-9]?$" - - "^N0[1-9]?3?[01]?$" - - "^N1[02]?3?[01]?$" - - "^N19?[0-9]?[0-9]?[01]?[0-9]?[0-3]?[0-9]?$" - - "^N20?[0-9]?[0-9]?[01]?[0-9]?[0-3]?[0-9]?$" - # 计算器模式 - - "^V.*$" - # email - - "^[A-Za-z][-_.0-9A-Za-z]*@.*$" - # URL 前缀几种: - - "^www[.].*$" - - "^https?:.*$" - - "^ftp[.:].*$" - - "^mailto:.*$" - - "^file:.*$" - - "^webdav:.*$" + # 标点符号 # punctuator 下面有三个子项: # 设置为一个映射,就自动上屏;设置为多个映射,如 '/' : [ '/', ÷ ] 则进行复选。 diff --git a/lua/kp_number_processor.lua b/lua/kp_number_processor.lua index 311a830..3713920 100644 --- a/lua/kp_number_processor.lua +++ b/lua/kp_number_processor.lua @@ -12,6 +12,7 @@ -- # compose : 无论是否在输入中,小键盘都参与编码(不直接上屏) -- kp_number_mode: auto + local wanxiang = require("wanxiang") -- 小键盘键码映射 @@ -27,69 +28,25 @@ local KP = { [0xFFB9] = 9, [0xFFB0] = 0, -- KP_0 } - local P = {} --- 从 schema 读取 kp_number/patterns 列表 -local function load_function_patterns(config) - local patterns = {} +-- [调试工具] 最小化日志打印 (如需调试请取消注释) +-- local function log_info(msg) +-- log.info("kp_number: " .. tostring(msg)) +-- end - local ok_list, list = pcall(function() - return config:get_list("kp_number/patterns") - end) - if ok_list and list and list.size and list.size > 0 then - for i = 0, list.size - 1 do - local item = list:get_value_at(i) - if item then - local pat = item:get_string() - if pat and pat ~= "" then - table.insert(patterns, pat) - end - end - end - end - - -- 如果用户没配,给一份保底的默认集合(等价你现在用的那些) - if #patterns == 0 then - patterns = { - "^/[0-9]$", "^/10$", "^/[A-Za-z]+$", - "^`[A-Za-z]*$", - "^``[A-Za-z/`']*$", - "^U[%da-f]+$", - "^R[0-9]+%.?[0-9]*$", - "^N0[1-9]?0?[1-9]?$", - "^N1[02]?0?[1-9]?$", - "^N0[1-9]?[1-2]?[1-9]?$", - "^N1[02]?[1-2]?[1-9]?$", - "^N0[1-9]?3?[01]?$", - "^N1[02]?3?[01]?$", - "^N19?[0-9]?[0-9]?[01]?[0-2]?[0-3]?[0-9]?$", - "^N20?[0-9]?[0-9]?[01]?[0-2]?[0-3]?[0-9]?$", - "^V.*$", - } - end - - return patterns -end - --- 根据“当前编码 + 这次按下的数字字符”判断是否属于命令模式 +-- 检查当前输入+数字是否匹配命令模式 local function is_function_code_after_digit(env, context, digit_char) - if not context or not digit_char or digit_char == "" then - return false - end + if not context or not digit_char or digit_char == "" then return false end local code = context.input or "" local s = code .. digit_char - + local pats = env.function_patterns - if not pats or #pats == 0 then - return false - end + if not pats then return false end for _, pat in ipairs(pats) do - -- 这里 pat 必须是 Lua pattern 语法 - if s:match(pat) then - return true - end + -- Lua pattern 匹配 + if s:match(pat) then return true end end return false end @@ -99,33 +56,27 @@ function P.init(env) local engine = env.engine local config = engine.schema.config local context = engine.context - - -- 读数字选词个数 + env.page_size = config:get_int("menu/page_size") or 6 - - -- 读小键盘模式:auto / compose,默认 auto local m = config:get_string("kp_number/kp_number_mode") or "auto" - if m ~= "auto" and m ~= "compose" then - m = "auto" - end + if m ~= "auto" and m ~= "compose" then m = "auto" end env.kp_mode = m - -- 初始化状态快照 env.context = context env.is_composing = context:is_composing() env.has_menu = context:has_menu() - -- 读取命令模式 Lua pattern 集合 - env.function_patterns = load_function_patterns(config) + -- 从 wanxiang 模块加载并转译正则 + -- 这一步会自动处理 YAML 正则到 Lua 模式的所有转换 + env.function_patterns = wanxiang.load_regex_patterns(config, "recognizer/patterns") - -- 用 update_notifier 同步 context / is_composing / has_menu + -- log_info("Loaded " .. #(env.function_patterns or {}) .. " patterns.") env.kp_update_connection = context.update_notifier:connect(function(ctx) env.context = ctx env.is_composing = ctx:is_composing() env.has_menu = ctx:has_menu() end) end - ---@param env Env function P.fini(env) if env.kp_update_connection then @@ -142,112 +93,67 @@ end ---@param env Env ---@return ProcessResult function P.func(key, env) - -- 只处理按下 - if key:release() then - return wanxiang.RIME_PROCESS_RESULTS.kNoop - end + if key:release() then return wanxiang.RIME_PROCESS_RESULTS.kNoop end - local engine = env.engine - local context = env.context or engine.context + local context = env.context or env.engine.context local mode = env.kp_mode or "auto" local page_sz = env.page_size - local is_composing = env.is_composing - local has_menu = env.has_menu - - ------------------------------------------------------------------ - -- 1) 小键盘数字:auto / compose - -- 如果“加上本次数字后”还匹配某个命令模式 pattern: - -- 只作为编码输入,不 commit、不选词。 - ------------------------------------------------------------------ + -- 1) 小键盘数字处理 local kp_num = KP[key.keycode] if kp_num ~= nil then if key:ctrl() or key:alt() or key:super() or key:shift() then return wanxiang.RIME_PROCESS_RESULTS.kNoop end - local ch = tostring(kp_num) -- "0".."9" + local ch = tostring(kp_num) + -- 如果匹配到正则(如网址、反查),则拦截,强制作为编码输入 if is_function_code_after_digit(env, context, ch) then - if context then - if context.push_input then - context:push_input(ch) - else - context.input = (context.input or "") .. ch - end - end + if context.push_input then context:push_input(ch) + else context.input = (context.input or "") .. ch end return wanxiang.RIME_PROCESS_RESULTS.kAccepted end + -- 正常数字逻辑 if mode == "auto" then - -- 输入中:参与编码;空闲:直接上屏 - if is_composing then - if context.push_input then - context:push_input(ch) - else - context.input = (context.input or "") .. ch - end + if env.is_composing then + if context.push_input then context:push_input(ch) + else context.input = (context.input or "") .. ch end else - engine:commit_text(ch) - end - else - -- compose:始终参与编码 - if context.push_input then - context:push_input(ch) - else - context.input = (context.input or "") .. ch + env.engine:commit_text(ch) end + else -- compose + if context.push_input then context:push_input(ch) + else context.input = (context.input or "") .. ch end end - return wanxiang.RIME_PROCESS_RESULTS.kAccepted end - ------------------------------------------------------------------ - -- 2) 主键盘数字: - -- 2.1 若“加上本次数字后”匹配命令模式 → 只当编码输入 - -- 2.2 否则: - -- 有菜单时:选第 n 个候选 - -- 空闲时:直接上屏 - ------------------------------------------------------------------ + -- 2) 主键盘数字处理 local r = key:repr() or "" - if r:match("^[0-9]$") then if key:ctrl() or key:alt() or key:super() then return wanxiang.RIME_PROCESS_RESULTS.kNoop end - -- 命令模式:只作为编码输入 + if is_function_code_after_digit(env, context, r) then - if context then - if context.push_input then - context:push_input(r) - else - context.input = (context.input or "") .. r - end - end + if context.push_input then context:push_input(r) + else context.input = (context.input or "") .. r end return wanxiang.RIME_PROCESS_RESULTS.kAccepted end - -- 有候选菜单时,用数字选「当前页」的第 n 个候选 - if has_menu then + if env.has_menu then local d = tonumber(r) - -- 如果按下的是 0,视为第 10 个选项 if d == 0 then d = 10 end - -- 检查是否在有效范围内 (例如 page_size 是 10,那么 1-10 都有效) if d and d >= 1 and d <= page_sz then - local composition = context and context.composition + local composition = context.composition if composition and not composition:empty() then local seg = composition:back() local menu = seg and seg.menu if menu and not menu:empty() then local sel_index = seg.selected_index or 0 - local page_size = page_sz - -- 计算当前页起始位置 - local page_no = math.floor(sel_index / page_size) - local page_start = page_no * page_size - - -- 计算目标候选的全局下标 (d=10 则取第10个) + local page_start = math.floor(sel_index / page_sz) * page_sz local index = page_start + (d - 1) - - -- 防止越界并执行上屏 if index < menu:candidate_count() then if context:select(index) then return wanxiang.RIME_PROCESS_RESULTS.kAccepted @@ -256,12 +162,11 @@ function P.func(key, env) end end end - -- 如果数字超出了 page_size (例如设置每页6个,按了7), - -- 或者没有选中成功,返回 kNoop,交给 Rime 默认处理 return wanxiang.RIME_PROCESS_RESULTS.kNoop end end + return wanxiang.RIME_PROCESS_RESULTS.kNoop end -return P +return P \ No newline at end of file diff --git a/lua/wanxiang.lua b/lua/wanxiang.lua index 87aa9e7..5cda15b 100644 --- a/lua/wanxiang.lua +++ b/lua/wanxiang.lua @@ -5,7 +5,7 @@ local wanxiang = {} -- x-release-please-start-version -wanxiang.version = "v14.0.6" +wanxiang.version = "v14.0.5" -- x-release-please-end @@ -281,4 +281,237 @@ function wanxiang.get_input_method_type(env) end end -return wanxiang +-- Wanxiang Regex > lua --不支持断言够用了 +local RegexParser = {} + +function RegexParser.normalize(regex) + local p = regex + p = p:gsub("%(%?%:", "%(") -- 清理 (?: + -- 基础转义 + p = p:gsub("\\d", "%%d"); p = p:gsub("\\D", "%%D") + p = p:gsub("\\w", "%%w"); p = p:gsub("\\W", "%%W") + p = p:gsub("\\s", "%%s"); p = p:gsub("\\S", "%%S") + -- 符号转义 (注意:\? -> %?,保留字面量问号) + p = p:gsub("\\%.", "%%."); p = p:gsub("\\%^", "%%^") + p = p:gsub("\\%$", "%%$"); p = p:gsub("\\%*", "%%*") + p = p:gsub("\\%+", "%%+"); p = p:gsub("\\%-", "%%-") + p = p:gsub("\\%?", "%%?") + p = p:gsub("\\%(", "%%("); p = p:gsub("\\%)", "%%)") + p = p:gsub("\\%[", "%%["); p = p:gsub("\\%]", "%%]") + + return p +end + +-- 递归展开 ? 量词 +-- 输入: "N[0-9]?A" +-- 输出: { "N[0-9]A", "NA" } +local function expand_optional(pattern_list) + local result = {} + local has_expansion = false + + for _, pat in ipairs(pattern_list) do + -- 寻找第一个未转义的 ? (Regex量词) + -- 我们需要找到 ? 的位置,并判断它修饰的前一个原子是什么 + local q_idx = nil + local atom_start = nil + local atom_end = nil + + local i = 1 + local len = #pat + while i <= len do + local char = string.sub(pat, i, i) + + if char == "%" then + -- 转义符,跳过下一个 + i = i + 2 + elseif char == "[" then + -- 集合 [...] + local j = i + 1 + while j <= len do + if string.sub(pat, j, j) == "]" and string.sub(pat, j-1, j-1) ~= "%" then + break + end + j = j + 1 + end + -- 检查后面是不是 ? + if j < len and string.sub(pat, j+1, j+1) == "?" then + atom_start = i + atom_end = j + q_idx = j + 1 + break -- 找到目标 + end + i = j + 1 + elseif char == "?" then + -- 找到一个 ?,修饰前面一个字符 + -- 注意:如果前面没有字符(比如开头),则是非法正则,忽略 + if i > 1 then + q_idx = i + atom_end = i - 1 + -- 判断前一个字符是否是转义结果 (如 %d) + if atom_end > 1 and string.sub(pat, atom_end-1, atom_end-1) == "%" then + atom_start = atom_end - 1 + else + atom_start = atom_end + end + break + end + i = i + 1 + else + i = i + 1 + end + end + + if q_idx then + has_expansion = true + -- 1. 保留原子 (去掉 ?) + local p1 = string.sub(pat, 1, atom_end) .. string.sub(pat, q_idx + 1) + -- 2. 删除原子 (去掉 原子+?) + local p2 = string.sub(pat, 1, atom_start - 1) .. string.sub(pat, q_idx + 1) + + table.insert(result, p1) + table.insert(result, p2) + else + table.insert(result, pat) + end + end + + if has_expansion then + if #result > 100 then return result end + return expand_optional(result) + end + + return result +end + +function RegexParser.smart_split(str, sep) + local results = {} + local current = "" + local paren_depth = 0 + local brack_depth = 0 + for i = 1, #str do + local char = string.sub(str, i, i) + local prev = (i > 1) and string.sub(str, i-1, i-1) or "" + if prev == "%" then + current = current .. char + else + if char == '(' then paren_depth = paren_depth + 1 end + if char == ')' then paren_depth = paren_depth - 1 end + if char == '[' then brack_depth = brack_depth + 1 end + if char == ']' then brack_depth = brack_depth - 1 end + if char == sep and paren_depth == 0 and brack_depth == 0 then + table.insert(results, current); current = "" + else + current = current .. char + end + end + end + table.insert(results, current) + return results +end + +function RegexParser.expand_groups(str_list) + local expanded = {} + for _, str in ipairs(str_list) do + local s_idx, e_idx = nil, nil + local depth = 0 + for i = 1, #str do + local char = string.sub(str, i, i) + local prev = (i > 1) and string.sub(str, i-1, i-1) or "" + if prev ~= "%" then + if char == "(" then + if depth == 0 then s_idx = i end + depth = depth + 1 + elseif char == ")" then + depth = depth - 1 + if depth == 0 and s_idx then e_idx = i; break end + end + end + end + if s_idx and e_idx then + local prefix = string.sub(str, 1, s_idx - 1) + local content = string.sub(str, s_idx + 1, e_idx - 1) + local suffix = string.sub(str, e_idx + 1) + local parts = RegexParser.smart_split(content, "|") + for _, part in ipairs(parts) do + table.insert(expanded, prefix .. part .. suffix) + end + else + table.insert(expanded, str) + end + end + return expanded +end + +local function ensure_anchor(p) + if not p or p == "" then return p end + -- 补 $ + local last = string.sub(p, -1) + local prev = string.sub(p, -2, -2) + if last ~= "$" or (last == "$" and prev == "%") then p = p .. "$" end + -- 补 ^ + local first = string.sub(p, 1, 1) + if first ~= "^" then p = "^" .. p end + return p +end + +function RegexParser.convert(regex_str) + if not regex_str or regex_str == "" then return {} end + local norm = RegexParser.normalize(regex_str) + -- 1. 拆分 | + local list = RegexParser.smart_split(norm, "|") + -- 2. 展开 () 分组 + local loop = 0 + local changed = true + while changed and loop < 5 do + local new_list = RegexParser.expand_groups(list) + if #new_list > #list then list = new_list else changed = false end + loop = loop + 1 + end + -- 3. 展开 ? 量词 + -- 这会将带 ? 的正则裂变成多个确定的正则 + list = expand_optional(list) + -- 4. 补全锚点 + for i, p in ipairs(list) do list[i] = ensure_anchor(p) end + return list +end + +--- 调用加载函数 +function wanxiang.load_regex_patterns(config, path) + local patterns = {} + local map = config:get_map(path) + if not map then return patterns end + local keys = map:keys() + if not keys then return patterns end + + local count = 0 + local is_ud = (type(keys) == "userdata") + if is_ud then + if keys.size then count = keys.size + else pcall(function() count = keys:size() end) end + else + count = #keys + end + + for i = 0, count - 1 do + local k_str + if is_ud then + local it = keys:get_value_at(i) + if it then k_str = it.value end + if not k_str then pcall(function() k_str = keys[i] end) end + else + k_str = keys[i+1] + end + + if k_str then + local val = map:get_value(k_str) + if val and val.value and val.value ~= "" then + local lua_pats = RegexParser.convert(val.value) + for _, p in ipairs(lua_pats) do + table.insert(patterns, p) + end + end + end + end + return patterns +end +return wanxiang \ No newline at end of file diff --git a/wanxiang.schema.yaml b/wanxiang.schema.yaml index 910a8e3..d6ee440 100644 --- a/wanxiang.schema.yaml +++ b/wanxiang.schema.yaml @@ -133,6 +133,13 @@ tips: charsetlist: [] charsetblacklist: [] +# 给 kp_number_processor 用的小键盘模式,能自动读取recognizer下面正则与之功能对齐 +kp_number: + kp_number_mode: auto + #小键盘数字处理逻辑 + # "compose" : 小键盘数字始终不上屏参与编码 + # "auto" : 输入中 push,空闲时 commit(默认) + #shijian:仅仅作为提示使用,编码已经写死,引导键可以在key_binder下修改前缀 #时间:osj 或者 /sj #日期:orq 或者 /rq @@ -487,46 +494,7 @@ recognizer: #add_user_dict: "^ac[A-Za-z/`']*$" #自造词引导方式 email: "^[A-Za-z][-_.0-9A-Za-z]*@.*$" # email @ 之后不上屏 url: "^(www[.]|https?:|ftp[.:]|mailto:|file:).*$|^[a-z]+[.].+$" # URL -# 给 kp_number_processor 用的“命令模式 Lua 正则集合” -# 能够细化哪些情况数字是用来当作输入编码的,不在正则范围的将用于上屏 -# 直接加载上面的正则会遇到不符合预期的情况,毕竟Lua正则逻辑与之不同 -kp_number: - #小键盘数字处理逻辑 - # "compose" : 小键盘数字始终参与编码 - # "auto" : 输入中 push,空闲时 commit(默认) - kp_number_mode: auto - patterns: - # /符号引导模式 - - "^/[0-9]$" - - "^/10$" - - "^/[A-Za-z]+$" - # U模式 - - "^U[%da-f]+$" - # R模式 - - "^R[0-9]+%.?[0-9]*$" - # 时间差 - - "^.rc%d+[-+=op]?$" - # N模式 - # Lua 不支持 {1,8},改成等价写法:N 后 1~8 个数字 - - "^N0[1-9]?0?[1-9]?$" - - "^N1[02]?0?[1-9]?$" - - "^N0[1-9]?[1-2]?[1-9]?$" - - "^N1[02]?[1-2]?[1-9]?$" - - "^N0[1-9]?3?[01]?$" - - "^N1[02]?3?[01]?$" - - "^N19?[0-9]?[0-9]?[01]?[0-9]?[0-3]?[0-9]?$" - - "^N20?[0-9]?[0-9]?[01]?[0-9]?[0-3]?[0-9]?$" - # 计算器模式 - - "^V.*$" - # email - - "^[A-Za-z][-_.0-9A-Za-z]*@.*$" - # URL 前缀几种: - - "^www[.].*$" - - "^https?:.*$" - - "^ftp[.:].*$" - - "^mailto:.*$" - - "^file:.*$" - - "^webdav:.*$" + # 标点符号 # punctuator 下面有三个子项: # 设置为一个映射,就自动上屏;设置为多个映射,如 '/' : [ '/', ÷ ] 则进行复选。