feat: 新增yaml正则转义lua正则模块，数字键处理逻辑将直接读取状态

2026-05-25 07:36:44 +00:00 · 2026-01-05 10:34:19 +08:00
parent be21abc4d3
commit 0417d9dfdb
4 changed files with 292 additions and 218 deletions
--- a/lua/wanxiang.lua
+++ b/lua/wanxiang.lua
@@ -5,7 +5,7 @@ local wanxiang = {}

 -- x-release-please-start-version

-wanxiang.version = "v14.0.6"
+wanxiang.version = "v14.0.5"

 -- x-release-please-end

@@ -281,4 +281,237 @@ function wanxiang.get_input_method_type(env)
    end
 end

-return wanxiang
+-- Wanxiang Regex > lua --不支持断言够用了
+local RegexParser = {}
+
+function RegexParser.normalize(regex)
+    local p = regex
+    p = p:gsub("%(%?%:", "%(") -- 清理 (?:
+    -- 基础转义
+    p = p:gsub("\\d", "%%d"); p = p:gsub("\\D", "%%D")
+    p = p:gsub("\\w", "%%w"); p = p:gsub("\\W", "%%W")
+    p = p:gsub("\\s", "%%s"); p = p:gsub("\\S", "%%S")
+    -- 符号转义 (注意：\? -> %?，保留字面量问号)
+    p = p:gsub("\\%.", "%%."); p = p:gsub("\\%^", "%%^")
+    p = p:gsub("\\%$", "%%$"); p = p:gsub("\\%*", "%%*")
+    p = p:gsub("\\%+", "%%+"); p = p:gsub("\\%-", "%%-")
+    p = p:gsub("\\%?", "%%?")
+    p = p:gsub("\\%(", "%%("); p = p:gsub("\\%)", "%%)")
+    p = p:gsub("\\%[", "%%["); p = p:gsub("\\%]", "%%]")
+    
+    return p
+end
+
+-- 递归展开 ? 量词
+-- 输入: "N[0-9]?A"
+-- 输出: { "N[0-9]A", "NA" }
+local function expand_optional(pattern_list)
+    local result = {}
+    local has_expansion = false
+
+    for _, pat in ipairs(pattern_list) do
+        -- 寻找第一个未转义的 ? (Regex量词)
+        -- 我们需要找到 ? 的位置，并判断它修饰的前一个原子是什么
+        local q_idx = nil
+        local atom_start = nil
+        local atom_end = nil
+
+        local i = 1
+        local len = #pat
+        while i <= len do
+            local char = string.sub(pat, i, i)
+            
+            if char == "%" then
+                -- 转义符，跳过下一个
+                i = i + 2
+            elseif char == "[" then
+                -- 集合 [...]
+                local j = i + 1
+                while j <= len do
+                    if string.sub(pat, j, j) == "]" and string.sub(pat, j-1, j-1) ~= "%" then
+                        break
+                    end
+                    j = j + 1
+                end
+                -- 检查后面是不是 ?
+                if j < len and string.sub(pat, j+1, j+1) == "?" then
+                    atom_start = i
+                    atom_end = j
+                    q_idx = j + 1
+                    break -- 找到目标
+                end
+                i = j + 1
+            elseif char == "?" then
+                -- 找到一个 ?，修饰前面一个字符
+                -- 注意：如果前面没有字符（比如开头），则是非法正则，忽略
+                if i > 1 then
+                    q_idx = i
+                    atom_end = i - 1
+                    -- 判断前一个字符是否是转义结果 (如 %d)
+                    if atom_end > 1 and string.sub(pat, atom_end-1, atom_end-1) == "%" then
+                        atom_start = atom_end - 1
+                    else
+                        atom_start = atom_end
+                    end
+                    break
+                end
+                i = i + 1
+            else
+                i = i + 1
+            end
+        end
+
+        if q_idx then
+            has_expansion = true
+            -- 1. 保留原子 (去掉 ?)
+            local p1 = string.sub(pat, 1, atom_end) .. string.sub(pat, q_idx + 1)
+            -- 2. 删除原子 (去掉 原子+?)
+            local p2 = string.sub(pat, 1, atom_start - 1) .. string.sub(pat, q_idx + 1)
+            
+            table.insert(result, p1)
+            table.insert(result, p2)
+        else
+            table.insert(result, pat)
+        end
+    end
+
+    if has_expansion then
+        if #result > 100 then return result end
+        return expand_optional(result)
+    end
+    
+    return result
+end
+
+function RegexParser.smart_split(str, sep)
+    local results = {}
+    local current = ""
+    local paren_depth = 0
+    local brack_depth = 0
+    for i = 1, #str do
+        local char = string.sub(str, i, i)
+        local prev = (i > 1) and string.sub(str, i-1, i-1) or ""
+        if prev == "%" then
+            current = current .. char
+        else
+            if char == '(' then paren_depth = paren_depth + 1 end
+            if char == ')' then paren_depth = paren_depth - 1 end
+            if char == '[' then brack_depth = brack_depth + 1 end
+            if char == ']' then brack_depth = brack_depth - 1 end
+            if char == sep and paren_depth == 0 and brack_depth == 0 then
+                table.insert(results, current); current = ""
+            else
+                current = current .. char
+            end
+        end
+    end
+    table.insert(results, current)
+    return results
+end
+
+function RegexParser.expand_groups(str_list)
+    local expanded = {}
+    for _, str in ipairs(str_list) do
+        local s_idx, e_idx = nil, nil
+        local depth = 0
+        for i = 1, #str do
+            local char = string.sub(str, i, i)
+            local prev = (i > 1) and string.sub(str, i-1, i-1) or ""
+            if prev ~= "%" then
+                if char == "(" then
+                    if depth == 0 then s_idx = i end
+                    depth = depth + 1
+                elseif char == ")" then
+                    depth = depth - 1
+                    if depth == 0 and s_idx then e_idx = i; break end
+                end
+            end
+        end
+        if s_idx and e_idx then
+            local prefix = string.sub(str, 1, s_idx - 1)
+            local content = string.sub(str, s_idx + 1, e_idx - 1)
+            local suffix = string.sub(str, e_idx + 1)
+            local parts = RegexParser.smart_split(content, "|")
+            for _, part in ipairs(parts) do
+                table.insert(expanded, prefix .. part .. suffix)
+            end
+        else
+            table.insert(expanded, str)
+        end
+    end
+    return expanded
+end
+
+local function ensure_anchor(p)
+    if not p or p == "" then return p end
+    -- 补 $
+    local last = string.sub(p, -1)
+    local prev = string.sub(p, -2, -2)
+    if last ~= "$" or (last == "$" and prev == "%") then p = p .. "$" end
+    -- 补 ^
+    local first = string.sub(p, 1, 1)
+    if first ~= "^" then p = "^" .. p end
+    return p
+end
+
+function RegexParser.convert(regex_str)
+    if not regex_str or regex_str == "" then return {} end
+    local norm = RegexParser.normalize(regex_str)
+    -- 1. 拆分 |
+    local list = RegexParser.smart_split(norm, "|")
+    -- 2. 展开 () 分组
+    local loop = 0
+    local changed = true
+    while changed and loop < 5 do
+        local new_list = RegexParser.expand_groups(list)
+        if #new_list > #list then list = new_list else changed = false end
+        loop = loop + 1
+    end
+    -- 3. 展开 ? 量词
+    -- 这会将带 ? 的正则裂变成多个确定的正则
+    list = expand_optional(list)
+    -- 4. 补全锚点
+    for i, p in ipairs(list) do list[i] = ensure_anchor(p) end
+    return list
+end
+
+--- 调用加载函数
+function wanxiang.load_regex_patterns(config, path)
+    local patterns = {}
+    local map = config:get_map(path)
+    if not map then return patterns end
+    local keys = map:keys()
+    if not keys then return patterns end
+    
+    local count = 0
+    local is_ud = (type(keys) == "userdata")
+    if is_ud then
+        if keys.size then count = keys.size 
+        else pcall(function() count = keys:size() end) end
+    else
+        count = #keys
+    end
+
+    for i = 0, count - 1 do
+        local k_str
+        if is_ud then
+            local it = keys:get_value_at(i)
+            if it then k_str = it.value end
+            if not k_str then pcall(function() k_str = keys[i] end) end
+        else
+            k_str = keys[i+1]
+        end
+
+        if k_str then
+            local val = map:get_value(k_str)
+            if val and val.value and val.value ~= "" then
+                local lua_pats = RegexParser.convert(val.value)
+                for _, p in ipairs(lua_pats) do
+                    table.insert(patterns, p)
+                end
+            end
+        end
+    end
+    return patterns
+end
+return wanxiang