Files
rime_wanxiang/lua/wanxiang.lua
2026-01-31 01:04:15 +08:00

538 lines
18 KiB
Lua
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
---@diagnostic disable: undefined-global
-- 万象的一些共用工具函数
local wanxiang = {}
-- x-release-please-start-version
wanxiang.version = "v14.6.4"
-- x-release-please-end
-- 全局内容
---@alias PROCESS_RESULT ProcessResult
wanxiang.RIME_PROCESS_RESULTS = {
kRejected = 0, -- 表示处理器明确拒绝了这个按键,停止处理链但不返回 true
kAccepted = 1, -- 表示处理器成功处理了这个按键,停止处理链并返回 true
kNoop = 2, -- 表示处理器没有处理这个按键,继续传递给下一个处理器
}
-- 整个生命周期内不变,缓存判断结果
local is_mobile_device = nil
-- 判断是否为手机设备
---@author amzxyz
---@return boolean
function wanxiang.is_mobile_device()
local function _is_mobile_device()
local dist = rime_api.get_distribution_code_name() or ""
local user_data_dir = rime_api.get_user_data_dir() or ""
local sys_dir = rime_api.get_shared_data_dir() or ""
-- 转换为小写以便比较
local lower_dist = dist:lower()
local lower_path = user_data_dir:lower()
local sys_lower_path = sys_dir:lower()
-- 主判断:常见移动端输入法
if lower_dist == "trime" or
lower_dist == "hamster" or
lower_dist == "hamster3" or
lower_dist == "squirrel" then
return true
end
-- 补充判断路径中包含移动设备特征很可以mac的运行逻辑和手机一球样
if lower_path:find("/android/") or
lower_path:find("/mobile/") or
lower_path:find("/sdcard/") or
lower_path:find("/data/storage/") or
lower_path:find("/storage/emulated/") or
lower_path:find("applications") or
lower_path:find("library") then
return true
end
-- 补充判断路径中包含移动设备特征很可以mac的运行逻辑和手机一球样
if sys_lower_path:find("applications") or
sys_lower_path:find("library") then
return true
end
-- 特定平台判断Android/Linux
if jit and jit.os then
local os_name = jit.os:lower()
if os_name:find("android") then
return true
end
end
-- 所有检查未通过则默认为桌面设备
return false
end
if is_mobile_device == nil then
is_mobile_device = _is_mobile_device()
end
return is_mobile_device
end
--- 检测是否为万象专业版
---@param env Env
---@return boolean
function wanxiang.is_pro_scheme(env)
-- local schema_name = env.engine.schema.schema_name
-- return schema_name:gsub("PRO$", "") ~= schema_name
return env.engine.schema.schema_id == "wanxiang_pro"
end
-- 以 `tag` 方式检测是否处于反查模式
function wanxiang.is_in_radical_mode(env)
local seg = env.engine.context.composition:back()
return seg and (
seg:has_tag("wanxiang_reverse")
) or false
end
---判断是否在命令模式
---@param context Context | nil
---@return boolean
function wanxiang.is_function_mode_active(context)
if not context or not context.composition or context.composition:empty() then
return false
end
local seg = context.composition:back()
if not seg then return false end
return seg:has_tag("number") or -- number_translator.lua 数字金额转换 R+数字
seg:has_tag("unicode") or -- unicode.lua 输出 Unicode 字符 U+小写字母或数字
--seg:has_tag("punct") or -- 标点符号 全角半角提示
seg:has_tag("calculator") or -- super_calculator.lua V键计算器
seg:has_tag("shijian") or -- shijian.lua /rq /sr 等与时间日期相关功能
seg:has_tag("Ndate") -- shijian.lua N日期功能
end
---判断文件是否存在
function wanxiang.file_exists(filename)
local f = io.open(filename, "r")
if f ~= nil then
io.close(f)
return true
else
return false
end
end
-- 判断字符是否为汉字
function wanxiang.IsChineseCharacter(text)
local codepoint = utf8.codepoint(text)
return
(codepoint >= 0x4E00 and codepoint <= 0x9FFF) -- Basic
or (codepoint >= 0x3400 and codepoint <= 0x4DBF) -- Ext A
or (codepoint >= 0x20000 and codepoint <= 0x2A6DF) -- Ext B
or (codepoint >= 0x2A700 and codepoint <= 0x2B73F) -- Ext C
or (codepoint >= 0x2B740 and codepoint <= 0x2B81F) -- Ext D
or (codepoint >= 0x2B820 and codepoint <= 0x2CEAF) -- Ext E
or (codepoint >= 0x2CEB0 and codepoint <= 0x2EBEF) -- Ext F
or (codepoint >= 0x30000 and codepoint <= 0x3134F) -- Ext G
or (codepoint >= 0x31350 and codepoint <= 0x323AF) -- Ext H
or (codepoint >= 0x2EBF0 and codepoint <= 0x2EE5F) -- Ext I
or (codepoint >= 0xF900 and codepoint <= 0xFAFF) -- Compatibility
or (codepoint >= 0x2F800 and codepoint <= 0x2FA1F) -- Compatibility Supplement
or (codepoint >= 0x2E80 and codepoint <= 0x2EFF) -- Radicals Supplement
or (codepoint >= 0x2F00 and codepoint <= 0x2FDF) -- Kangxi Radicals
end
---按照优先顺序获取文件:用户目录 > 系统目录
---@param filename string 相对路径
---@retur string | nil
-- 辅助函数:检测路径是否为绝对路径(以 / 或盘符开头)
local function is_absolute_path(path)
if not path then return false end
if path:sub(1, 1) == "/" or path:sub(1, 1) == "\\" then
return true
end
if path:match("^[a-zA-Z]:[\\/]") then
return true
end
return false
end
function wanxiang.get_filename_with_fallback(filename)
local _path = filename:gsub("^[\\/]+", "")
local user_dir = rime_api.get_user_data_dir()
if not is_absolute_path(user_dir) then
return filename
end
local user_path = user_dir .. "/" .. _path
if wanxiang.file_exists(user_path) then
return user_path
end
local shared_dir = rime_api.get_shared_data_dir()
if not is_absolute_path(shared_dir) then
return filename
end
local shared_path = shared_dir .. "/" .. _path
if wanxiang.file_exists(shared_path) then
return shared_path
end
return nil
end
-- 按照优先顺序加载文件:用户目录 > 系统目录
---@param filename string 相对路径
---@retur file* | nil, function
function wanxiang.load_file_with_fallback(filename, mode)
mode = mode or "r" -- 默认读取模式
local _filename = wanxiang.get_filename_with_fallback(filename)
local file, err
local function close()
if not file then return end
file:close()
file = nil
end
if _filename then
file, err = io.open(_filename, mode)
end
return file, close, err
end
local USER_ID_DEFAULT = "unknown"
---作为「小狼毫」和「仓」 `rime_api.get_user_id()` 的一个 workaround
---详见:
---1. https://github.com/rime/weasel/pull/1649
---2. https://github.com/rime/librime/issues/1038
---@return string
function wanxiang.get_user_id()
local user_id = rime_api.get_user_id()
if user_id ~= USER_ID_DEFAULT then return user_id end
local user_data_dir = rime_api.get_user_data_dir()
local installation_path = user_data_dir .. "/installation.yaml"
local installation_file, _ = io.open(installation_path, "r")
if not installation_file then return user_id end
for line in installation_file:lines() do
local key, value = line:match('^([^#:]+):%s+"?([^"]%S+[^"])"?')
if key == "installation_id" then
user_id = value
break
end
end
installation_file:close()
return user_id
end
wanxiang.INPUT_METHOD_MARKERS = {
[""] = "pinyin", --全拼
[""] = "zrm", --自然码双拼
[""] = "flypy", --小鹤双拼
[""] = "mspy", --微软双拼
[""] = "sogou", --搜狗双拼
[""] = "abc", --智能abc双拼
[""] = "ziguang", --紫光双拼
[""] = "pyjj", --拼音加加
[""] = "gbpy", --国标双拼
[""] = "wxsp", --万象双拼
[""] = "zrlong", --自然龙
[""] = "hxlong", --汉心龙
[""] = "lxsq", --乱序17
[""] = "", -- 间接辅助标记:命中则额外返回 md="ⅲ"
}
local __input_type_cache = {} -- 缓存首个命中的 id兼容旧用法
local __input_md_cache = {} -- 新增:是否命中“ⅲ”(若命中则为 "ⅲ",否则为 nil
--- 根据 speller/algebra 中的特殊符号返回输入类型:
--- - 若未命中“ⅲ”,只返回 id保持旧行为
--- - 若命中“ⅲ”返回两个值id, "ⅲ"
---@param env Env
---@return string -- id
---@return string|nil -- md仅在命中“ⅲ”时返回 "ⅲ"
function wanxiang.get_input_method_type(env)
local schema_id = env.engine.schema.schema_id or "unknown"
-- 命中缓存则按是否有 md 决定返回 1 个或 2 个值
local cached_id = __input_type_cache[schema_id]
if cached_id then
local cached_md = __input_md_cache[schema_id]
if cached_md then
return cached_id, cached_md -- 返回两个值id, "ⅲ"
else
return cached_id -- 只返回 id
end
end
local cfg = env.engine.schema.config
local result_id = "unknown"
local md = nil -- 只有命中“ⅲ”时设为 "ⅲ"
local n = cfg:get_list_size("speller/algebra")
for i = 0, n - 1 do
local s = cfg:get_string(("speller/algebra/@%d"):format(i))
if s then
-- 不提前返回:需要把整段都扫描完,才能知道是否命中“ⅲ”
for symbol, id in pairs(wanxiang.INPUT_METHOD_MARKERS) do
if s:find(symbol, 1, true) then
if symbol == "" or id == "" then
md = "" -- 记录辅助标记
else
if result_id == "unknown" then
result_id = id -- 只记录第一个“正常映射”的 id
end
end
end
end
end
end
-- 写缓存
__input_type_cache[schema_id] = result_id
__input_md_cache[schema_id] = md -- 命中则为 "ⅲ",否则为 nil
-- 返回:命中“ⅲ”→两个值;否则一个值
if md then
return result_id, md
else
return result_id
end
end
-- Wanxiang Regex > lua --不支持断言够用了
local RegexParser = {}
function RegexParser.normalize(regex)
local p = regex
p = p:gsub("%(%?%:", "%(") -- 清理 (?:
-- 基础转义
p = p:gsub("\\d", "%%d"); p = p:gsub("\\D", "%%D")
p = p:gsub("\\w", "%%w"); p = p:gsub("\\W", "%%W")
p = p:gsub("\\s", "%%s"); p = p:gsub("\\S", "%%S")
-- 符号转义 (注意:\? -> %?,保留字面量问号)
p = p:gsub("\\%.", "%%."); p = p:gsub("\\%^", "%%^")
p = p:gsub("\\%$", "%%$"); p = p:gsub("\\%*", "%%*")
p = p:gsub("\\%+", "%%+"); p = p:gsub("\\%-", "%%-")
p = p:gsub("\\%?", "%%?")
p = p:gsub("\\%(", "%%("); p = p:gsub("\\%)", "%%)")
p = p:gsub("\\%[", "%%["); p = p:gsub("\\%]", "%%]")
return p
end
-- 递归展开 ? 量词
-- 输入: "N[0-9]?A"
-- 输出: { "N[0-9]A", "NA" }
local function expand_optional(pattern_list)
local result = {}
local has_expansion = false
for _, pat in ipairs(pattern_list) do
-- 寻找第一个未转义的 ? (Regex量词)
-- 我们需要找到 ? 的位置,并判断它修饰的前一个原子是什么
local q_idx = nil
local atom_start = nil
local atom_end = nil
local i = 1
local len = #pat
while i <= len do
local char = string.sub(pat, i, i)
if char == "%" then
-- 转义符,跳过下一个
i = i + 2
elseif char == "[" then
-- 集合 [...]
local j = i + 1
while j <= len do
if string.sub(pat, j, j) == "]" and string.sub(pat, j-1, j-1) ~= "%" then
break
end
j = j + 1
end
-- 检查后面是不是 ?
if j < len and string.sub(pat, j+1, j+1) == "?" then
atom_start = i
atom_end = j
q_idx = j + 1
break -- 找到目标
end
i = j + 1
elseif char == "?" then
-- 找到一个 ?,修饰前面一个字符
-- 注意:如果前面没有字符(比如开头),则是非法正则,忽略
if i > 1 then
q_idx = i
atom_end = i - 1
-- 判断前一个字符是否是转义结果 (如 %d)
if atom_end > 1 and string.sub(pat, atom_end-1, atom_end-1) == "%" then
atom_start = atom_end - 1
else
atom_start = atom_end
end
break
end
i = i + 1
else
i = i + 1
end
end
if q_idx then
has_expansion = true
-- 1. 保留原子 (去掉 ?)
local p1 = string.sub(pat, 1, atom_end) .. string.sub(pat, q_idx + 1)
-- 2. 删除原子 (去掉 原子+?)
local p2 = string.sub(pat, 1, atom_start - 1) .. string.sub(pat, q_idx + 1)
table.insert(result, p1)
table.insert(result, p2)
else
table.insert(result, pat)
end
end
if has_expansion then
if #result > 100 then return result end
return expand_optional(result)
end
return result
end
function RegexParser.smart_split(str, sep)
local results = {}
local current = ""
local paren_depth = 0
local brack_depth = 0
for i = 1, #str do
local char = string.sub(str, i, i)
local prev = (i > 1) and string.sub(str, i-1, i-1) or ""
if prev == "%" then
current = current .. char
else
if char == '(' then paren_depth = paren_depth + 1 end
if char == ')' then paren_depth = paren_depth - 1 end
if char == '[' then brack_depth = brack_depth + 1 end
if char == ']' then brack_depth = brack_depth - 1 end
if char == sep and paren_depth == 0 and brack_depth == 0 then
table.insert(results, current); current = ""
else
current = current .. char
end
end
end
table.insert(results, current)
return results
end
function RegexParser.expand_groups(str_list)
local expanded = {}
for _, str in ipairs(str_list) do
local s_idx, e_idx = nil, nil
local depth = 0
for i = 1, #str do
local char = string.sub(str, i, i)
local prev = (i > 1) and string.sub(str, i-1, i-1) or ""
if prev ~= "%" then
if char == "(" then
if depth == 0 then s_idx = i end
depth = depth + 1
elseif char == ")" then
depth = depth - 1
if depth == 0 and s_idx then e_idx = i; break end
end
end
end
if s_idx and e_idx then
local prefix = string.sub(str, 1, s_idx - 1)
local content = string.sub(str, s_idx + 1, e_idx - 1)
local suffix = string.sub(str, e_idx + 1)
local parts = RegexParser.smart_split(content, "|")
for _, part in ipairs(parts) do
table.insert(expanded, prefix .. part .. suffix)
end
else
table.insert(expanded, str)
end
end
return expanded
end
local function ensure_anchor(p)
if not p or p == "" then return p end
-- 补 $
local last = string.sub(p, -1)
local prev = string.sub(p, -2, -2)
if last ~= "$" or (last == "$" and prev == "%") then p = p .. "$" end
-- 补 ^
local first = string.sub(p, 1, 1)
if first ~= "^" then p = "^" .. p end
return p
end
function RegexParser.convert(regex_str)
if not regex_str or regex_str == "" then return {} end
local norm = RegexParser.normalize(regex_str)
-- 1. 拆分 |
local list = RegexParser.smart_split(norm, "|")
-- 2. 展开 () 分组
local loop = 0
local changed = true
while changed and loop < 5 do
local new_list = RegexParser.expand_groups(list)
if #new_list > #list then list = new_list else changed = false end
loop = loop + 1
end
-- 3. 展开 ? 量词
-- 这会将带 ? 的正则裂变成多个确定的正则
list = expand_optional(list)
-- 4. 补全锚点
for i, p in ipairs(list) do list[i] = ensure_anchor(p) end
return list
end
--- 调用加载函数
function wanxiang.load_regex_patterns(config, path)
local patterns = {}
local map = config:get_map(path)
if not map then return patterns end
local keys = map:keys()
if not keys then return patterns end
local count = 0
local is_ud = (type(keys) == "userdata")
if is_ud then
if keys.size then count = keys.size
else pcall(function() count = keys:size() end) end
else
count = #keys
end
for i = 0, count - 1 do
local k_str
if is_ud then
local it = keys:get_value_at(i)
if it then k_str = it.value end
if not k_str then pcall(function() k_str = keys[i] end) end
else
k_str = keys[i+1]
end
if k_str then
local val = map:get_value(k_str)
if val and val.value and val.value ~= "" then
local lua_pats = RegexParser.convert(val.value)
for _, p in ipairs(lua_pats) do
table.insert(patterns, p)
end
end
end
end
return patterns
end
return wanxiang