Files
rime_wanxiang/lua/super_lookup.lua

285 lines
9.4 KiB
Lua
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
--@amzxyz https://github.com/amzxyz/rime_wanxiang
--wanxiang_lookup: #设置归属于super_lookup.lua
--tags: [ abc ] # 检索当前tag的候选
--key: "`" # 输入中反查引导符,要添加到 speller/alphabet
--lookup: [ wanxiang_reverse ] #反查滤镜数据库,万象都合并为一个了
------------------------------------------------------------
-- 工具函数
------------------------------------------------------------
local function alt_lua_punc(s)
return s and s:gsub('([%.%+%-%*%?%[%]%^%$%(%)%%])', '%%%1') or ''
end
local function is_all_upper(s) return s:match('^%u+$') ~= nil end
local function is_all_lower(s) return s:match('^%l+$') ~= nil end
------------------------------------------------------------
-- 规则加载
------------------------------------------------------------
local function parse_and_separate_rules(schema_id)
if not schema_id or #schema_id == 0 then return nil, nil end
local schema = Schema(schema_id)
if not schema then return nil, nil end
local config = schema.config
if not config then return nil, nil end
local algebra_list = config:get_list('speller/algebra')
if not algebra_list or algebra_list.size == 0 then return nil, nil end
local main_rules, xlit_rules = {}, {}
for i = 0, algebra_list.size - 1 do
local rule = algebra_list:get_value_at(i).value
if rule and #rule > 0 then
if rule:match("^xlit/HSPZN/") then
table.insert(xlit_rules, rule)
else
table.insert(main_rules, rule)
end
end
end
if #main_rules == 0 and #xlit_rules == 0 then return nil, nil end
return main_rules, xlit_rules
end
local function get_schema_rules(env)
local config = env.engine.schema.config
local db_list = config:get_list("wanxiang_lookup/lookup")
if not db_list or db_list.size == 0 then return {}, {} end
local schema_id = db_list:get_value_at(0).value
if not schema_id or #schema_id == 0 then return {}, {} end
local main_rules, xlit_rules = parse_and_separate_rules(schema_id)
if not main_rules and not xlit_rules then return {}, {} end
return main_rules or {}, xlit_rules or {}
end
------------------------------------------------------------
-- 核心逻辑
------------------------------------------------------------
local function expand_code_variant(main_projection, xlit_projection, part)
local out, seen = {}, {}
local function add(s)
if s and #s > 0 and not seen[s] then
seen[s] = true
out[#out + 1] = s
end
end
add(part)
if main_projection then
local p = main_projection:apply(part, true)
if p and #p > 0 then add(p) end
end
local base = {}
for i = 1, #out do
local elem = out[i]
if is_all_lower(elem) then base[#base + 1] = elem end
end
for _, s in ipairs(base) do
if #s == 4 and is_all_lower(s) then
add(s:sub(1,1) .. s:sub(3,3))
end
end
if is_all_upper(part) and xlit_projection then
local xlit_result = xlit_projection:apply(part, true)
if xlit_result and #xlit_result > 0 then add(xlit_result) end
end
return out
end
local function build_reverse_group(main_projection, xlit_projection, db_table, text)
local group, seen = {}, {}
for _, db in ipairs(db_table) do
local code = db:lookup(text)
if code and #code > 0 then
for part in code:gmatch('%S+') do
local variants = expand_code_variant(main_projection, xlit_projection, part)
for _, v in ipairs(variants) do
if not seen[v] then
seen[v] = true
group[#group + 1] = v
end
end
end
end
end
return group
end
local function group_match(group, fuma)
if not group then return false end
for i = 1, #group do
if tostring(group[i]):sub(1, #fuma) == fuma then return true end
end
return false
end
------------------------------------------------------------
-- 过滤器主体
------------------------------------------------------------
local f = {}
function f.init(env)
local config = env.engine.schema.config
env.if_reverse_lookup = false
env.db_table = nil
local db = config:get_list("wanxiang_lookup/lookup")
if db and db.size > 0 then
env.db_table = {}
for i = 0, db.size - 1 do
table.insert(env.db_table, ReverseLookup(db:get_value_at(i).value))
end
env.if_reverse_lookup = true
else
return
end
local main_rules, xlit_rules = get_schema_rules(env)
env.main_projection = (type(main_rules) == 'table' and #main_rules > 0) and Projection() or nil
if env.main_projection then env.main_projection:load(main_rules) end
env.xlit_projection = (type(xlit_rules) == 'table' and #xlit_rules > 0) and Projection() or nil
if env.xlit_projection then env.xlit_projection:load(xlit_rules) end
env.search_key_str = config:get_string('wanxiang_lookup/key') or '`'
env.search_key_alt = alt_lua_punc(env.search_key_str)
local tag = config:get_list('wanxiang_lookup/tags')
if tag and tag.size > 0 then
env.tag = {}
for i = 0, tag.size - 1 do
table.insert(env.tag, tag:get_value_at(i).value)
end
else
env.tag = { 'abc' }
end
env.notifier = env.engine.context.select_notifier:connect(function(ctx)
local input = ctx.input
local code = input:match('^(.-)' .. env.search_key_alt)
if (not code or #code == 0) then return end
local preedit = ctx:get_preedit()
local no_search_string = input:match('^(.-)' .. env.search_key_alt)
local edit = preedit.text:match('^(.-)' .. env.search_key_alt)
if edit and edit:match('[%w;]') then
ctx.input = no_search_string .. env.search_key_str
else
ctx.input = no_search_string
env.commit_code = no_search_string
ctx:commit()
end
end)
-- 【安全缓存】初始化
env._global_group_cache = {}
env.cache_size = 0 -- 计数器
end
function f.func(input, env)
if not env.if_reverse_lookup then
for cand in input:iter() do yield(cand) end
return
end
local ctx_input = env.engine.context.input
local s_start, s_end = ctx_input:find(env.search_key_alt, 1, false)
if not s_start then
for cand in input:iter() do yield(cand) end
return
end
local fuma = ctx_input:sub(s_end + 1)
-- 【惰性检查】无辅码,直接显示,不查库
if #fuma == 0 then
for cand in input:iter() do yield(cand) end
return
end
local fuma_segments = {}
for segment in fuma:gmatch('[^' .. env.search_key_alt .. ']+') do
table.insert(fuma_segments, string.lower(segment))
end
local if_single_char_first = env.engine.context:get_option('char_priority')
local long_word_cands = {}
local cache = env._global_group_cache
-- 如果缓存条目超过 3000清空重来
-- 3000个字足以覆盖99.9%的日常输入,且仅占用极小内存
if env.cache_size > 3000 then
env._global_group_cache = {}
env.cache_size = 0
cache = env._global_group_cache -- 更新引用
end
for cand in input:iter() do
if cand.type == 'sentence' then goto skip end
local cand_text = cand.text
-- 西文跳过
local b = string.byte(cand_text, 1)
if b and b < 128 then goto skip end
local cand_len = utf8.len(cand_text)
local characters = {}
local pos = 1
for i = 1, cand_len do
local next_pos = utf8.offset(cand_text, i + 1)
local char_str = cand_text:sub(pos, next_pos and next_pos - 1)
characters[i] = char_str
pos = next_pos
-- 【全局缓存】带计数
if not cache[char_str] then
cache[char_str] = build_reverse_group(env.main_projection, env.xlit_projection, env.db_table, char_str)
env.cache_size = env.cache_size + 1 -- 增加计数
end
end
local ok = true
if #fuma_segments == 1 and cand_len == 1 then
ok = group_match(cache[characters[1]], fuma_segments[1])
elseif #fuma_segments > 0 and cand_len > 1 then
local match_count = (#fuma_segments < cand_len) and #fuma_segments or cand_len
for i = 1, match_count do
if not group_match(cache[characters[i]], fuma_segments[i]) then
ok = false
break
end
end
else
if cand_len < #fuma_segments then ok = false end
end
if ok then
if if_single_char_first and cand_len > 1 then
table.insert(long_word_cands, cand)
else
yield(cand)
end
end
::skip::
end
for _, c in ipairs(long_word_cands) do yield(c) end
end
function f.tags_match(seg, env)
for _, v in ipairs(env.tag) do if seg.tags[v] then return true end end
return false
end
function f.fini(env)
if env.if_reverse_lookup and env.notifier then env.notifier:disconnect() end
env.db_table = nil
env._global_group_cache = nil
collectgarbage('collect')
end
return f