--@amzxyz https://github.com/amzxyz/rime_wanxiang --wanxiang_lookup: #设置归属于super_lookup.lua --tags: [ abc ] # 检索当前tag的候选 --key: "`" # 输入中反查引导符,要添加到 speller/alphabet --lookup: [ wanxiang_reverse ] #反查滤镜数据库,万象都合并为一个了 -- 获取 wanxiang 模块 local function get_wanxiang() local ok, mod = pcall(function() return require('wanxiang') end) if ok and type(mod) == 'table' then return mod end if type(_G.wanxiang) == 'table' then return _G.wanxiang end return nil end -- 各输入法类型对应的转换规则 -- flypy/mspy/sogou/abc/ziguang/pyjj/gbpy/lxsq/zrlong/hxlong local LOCAL_PROJECTION_RULES = { -- 全拼(pinyin) pinyin = { "xform/'//", "derive/^([nl])ue$/$1ve/", "derive/'([nl])ue$/'$1ve/", "derive/^([jqxy])u/$1v/", "derive/'([jqxy])u/'$1v/", }, -- 自然码(zrm) zrm = { "derive/^([jqxy])u(?=^|$|')/$1v/", "derive/'([jqxy])u(?=^|$|')/'$1v/", "derive/^([aoe])([ioun])(?=^|$|')/$1$1$2/", "derive/'([aoe])([ioun])(?=^|$|')/'$1$1$2/", "xform/^([aoe])(ng)?(?=^|$|')/$1$1$2/", "xform/'([aoe])(ng)?(?=^|$|')/'$1$1$2/", "xform/iu(?=^|$|')//", "xform/[iu]a(?=^|$|')//", "xform/[uv]an(?=^|$|')//", "xform/[uv]e(?=^|$|')//", "xform/ing(?=^|$|')|uai(?=^|$|')//", "xform/^sh//", "xform/^ch//", "xform/^zh//", "xform/'sh/'/", "xform/'ch/'/", "xform/'zh/'/", "xform/uo(?=^|$|')//", "xform/[uv]n(?=^|$|')/

/", "xform/([a-z>])i?ong(?=^|$|')/$1/", "xform/[iu]ang(?=^|$|')//", "xform/([a-z>])en(?=^|$|')/$1/", "xform/([a-z>])eng(?=^|$|')/$1/", "xform/([a-z>])ang(?=^|$|')/$1/", "xform/ian(?=^|$|')//", "xform/([a-z>])an(?=^|$|')/$1/", "xform/iao(?=^|$|')//", "xform/([a-z>])ao(?=^|$|')/$1/", "xform/([a-z>])ai(?=^|$|')/$1/", "xform/([a-z>])ei(?=^|$|')/$1/", "xform/ie(?=^|$|')//", "xform/ui(?=^|$|')//", "xform/([a-z>])ou(?=^|$|')/$1/", "xform/in(?=^|$|')//", "xform/'|<|>//", }, -- 小鹤(flypy) flypy = { "derive/^([jqxy])u(?=^|$|')/$1v/", "derive/'([jqxy])u(?=^|$|')/'$1v/", "derive/^([aoe])([ioun])(?=^|$|')/$1$1$2/", "derive/'([aoe])([ioun])(?=^|$|')/'$1$1$2/", "xform/^([aoe])(ng)?(?=^|$|')/$1$1$2/", "xform/'([aoe])(ng)?(?=^|$|')/'$1$1$2/", "xform/iu(?=^|$|')//", "xform/(.)ei(?=^|$|')/$1/", "xform/uan(?=^|$|')//", "xform/[uv]e(?=^|$|')//", "xform/un(?=^|$|')//", "xform/^sh//", "xform/^ch//", "xform/^zh//", "xform/'sh/'/", "xform/'ch/'/", "xform/'zh/'/", "xform/uo(?=^|$|')//", "xform/ie(?=^|$|')/

/", "xform/([a-z>])i?ong(?=^|$|')/$1/", "xform/ing(?=^|$|')|uai(?=^|$|')//", "xform/([a-z>])ai(?=^|$|')/$1/", "xform/([a-z>])en(?=^|$|')/$1/", "xform/([a-z>])eng(?=^|$|')/$1/", "xform/[iu]ang(?=^|$|')//", "xform/([a-z>])ang(?=^|$|')/$1/", "xform/ian(?=^|$|')//", "xform/([a-z>])an(?=^|$|')/$1/", "xform/([a-z>])ou(?=^|$|')/$1/", "xform/[iu]a(?=^|$|')//", "xform/iao(?=^|$|')//", "xform/([a-z>])ao(?=^|$|')/$1/", "xform/ui(?=^|$|')//", "xform/in(?=^|$|')//", "xform/'|<|>//", }, -- 微软(mspy) mspy = { "derive/^([jqxy])u(?=^|$|')/$1v/", "derive/'([jqxy])u(?=^|$|')/'$1v/", "derive/^([aoe].*)(?=^|$|')/o$1/", "derive/'([aoe].*)(?=^|$|')/'o$1/", "xform/^([ae])(.*)(?=^|$|')/$1$1$2/", "xform/'([ae])(.*)(?=^|$|')/'$1$1$2/", "xform/iu(?=^|$|')//", "xform/[iu]a(?=^|$|')//", "xform/er(?=^|$|')|[uv]an(?=^|$|')//", "xform/[uv]e(?=^|$|')//", "xform/v(?=^|$|')|uai(?=^|$|')//", "xform/^sh//", "xform/^ch//", "xform/^zh//", "xform/'sh/'/", "xform/'ch/'/", "xform/'zh/'/", "xform/uo(?=^|$|')//", "xform/[uv]n(?=^|$|')/

/", "xform/([a-z>])i?ong(?=^|$|')/$1/", "xform/[iu]ang(?=^|$|')//", "xform/([a-z>])en(?=^|$|')/$1/", "xform/([a-z>])eng(?=^|$|')/$1/", "xform/([a-z>])ang(?=^|$|')/$1/", "xform/ian(?=^|$|')//", "xform/([a-z>])an(?=^|$|')/$1/", "xform/iao(?=^|$|')//", "xform/([a-z>])ao(?=^|$|')/$1/", "xform/([a-z>])ai(?=^|$|')/$1/", "xform/([a-z>])ei(?=^|$|')/$1/", "xform/ie(?=^|$|')//", "xform/ui(?=^|$|')//", "derive/(?=^|$|')//", "xform/([a-z>])ou(?=^|$|')/$1/", "xform/in(?=^|$|')//", "xform/ing(?=^|$|')/;/", "xform/'|<|>//", }, -- 搜狗双拼(sogou) sogou = { "derive/^([jqxy])u(?=^|$|')/$1v/", "derive/'([jqxy])u(?=^|$|')/'$1v/", "derive/^([aoe].*)(?=^|$|')/o$1/", "derive/'([aoe].*)(?=^|$|')/'o$1/", "xform/^([ae])(.*)(?=^|$|')/$1$1$2/", "xform/'([ae])(.*)(?=^|$|')/'$1$1$2/", "xform/iu(?=^|$|')//", "xform/[iu]a(?=^|$|')//", "xform/er(?=^|$|')|[uv]an(?=^|$|')//", "xform/[uv]e(?=^|$|')//", "xform/v(?=^|$|')|uai(?=^|$|')//", "xform/^sh//", "xform/^ch//", "xform/^zh//", "xform/'sh/'/", "xform/'ch/'/", "xform/'zh/'/", "xform/uo(?=^|$|')//", "xform/[uv]n(?=^|$|')/

/", "xform/([a-z>])i?ong(?=^|$|')/$1/", "xform/[iu]ang(?=^|$|')//", "xform/([a-z>])en(?=^|$|')/$1/", "xform/([a-z>])eng(?=^|$|')/$1/", "xform/([a-z>])ang(?=^|$|')/$1/", "xform/ian(?=^|$|')//", "xform/([a-z>])an(?=^|$|')/$1/", "xform/iao(?=^|$|')//", "xform/([a-z>])ao(?=^|$|')/$1/", "xform/([a-z>])ai(?=^|$|')/$1/", "xform/([a-z>])ei(?=^|$|')/$1/", "xform/ie(?=^|$|')//", "xform/ui(?=^|$|')//", "xform/([a-z>])ou(?=^|$|')/$1/", "xform/in(?=^|$|')//", "xform/ing(?=^|$|')/;/", "xform/'|<|>//", }, -- 智能(abc) abc = { "xform/^zh//", "xform/^ch//", "xform/^sh//", "xform/'zh/'/", "xform/'ch/'/", "xform/'sh/'/", "xform/^([aoe].*)(?=^|$|')/$1/", "xform/'([aoe].*)(?=^|$|')/'$1/", "xform/ei(?=^|$|')//", "xform/ian(?=^|$|')//", "xform/er(?=^|$|')|iu(?=^|$|')//", "xform/[iu]ang(?=^|$|')//", "xform/ing(?=^|$|')//", "xform/uo(?=^|$|')//", "xform/uan(?=^|$|')/

/", "xform/([a-z>])i?ong(?=^|$|')/$1/", "xform/[iu]a(?=^|$|')//", "xform/en(?=^|$|')//", "xform/eng(?=^|$|')//", "xform/ang(?=^|$|')//", "xform/an(?=^|$|')//", "xform/iao(?=^|$|')//", "xform/ao(?=^|$|')//", "xform/in(?=^|$|')|uai(?=^|$|')//", "xform/ai(?=^|$|')//", "xform/ie(?=^|$|')//", "xform/ou(?=^|$|')//", "xform/un(?=^|$|')//", "xform/[uv]e(?=^|$|')|ui(?=^|$|')//", "xform/'|<|>//", }, -- 紫光(ziguang) ziguang = { "derive/^([jqxy])u(?=^|$|')/$1v/", "derive/'([jqxy])u(?=^|$|')/'$1v/", "xform/'([aoe].*)(?=^|$|')/'$1/", "xform/^([aoe].*)(?=^|$|')/$1/", "xform/en(?=^|$|')//", "xform/eng(?=^|$|')//", "xform/in(?=^|$|')|uai(?=^|$|')//", "xform/^zh//", "xform/^sh//", "xform/'zh/'/", "xform/'sh/'/", "xform/uo(?=^|$|')//", "xform/ai(?=^|$|')/

/", "xform/^ch//", "xform/'ch/'/", "xform/[iu]ang(?=^|$|')//", "xform/ang(?=^|$|')//", "xform/ie(?=^|$|')//", "xform/ian(?=^|$|')//", "xform/([a-z>])i?ong(?=^|$|')/$1/", "xform/er(?=^|$|')|iu(?=^|$|')//", "xform/ei(?=^|$|')//", "xform/uan(?=^|$|')//", "xform/ing(?=^|$|')/;/", "xform/ou(?=^|$|')//", "xform/[iu]a(?=^|$|')//", "xform/iao(?=^|$|')//", "xform/ue(?=^|$|')|ui(?=^|$|')|ve(?=^|$|')//", "xform/un(?=^|$|')//", "xform/ao(?=^|$|')//", "xform/an(?=^|$|')//", "xform/'|<|>//", }, -- 拼音加加(pyjj) pyjj = { "derive/^([jqxy])u(?=^|$|')/$1v/", "derive/'([jqxy])u(?=^|$|')/'$1v/", "derive/^([aoe])([ioun])(?=^|$|')/$1$1$2/", "derive/'([aoe])([ioun])(?=^|$|')/'$1$1$2/", "xform/^([aoe])(ng)?(?=^|$|')/$1$1$2/", "xform/'([aoe])(ng)?(?=^|$|')/'$1$1$2/", "xform/iu(?=^|$|')//", "xform/[iu]a(?=^|$|')//", "xform/[uv]an(?=^|$|')//", "xform/[uv]e(?=^|$|')|uai(?=^|$|')//", "xform/ing(?=^|$|')|er(?=^|$|')//", "xform/^sh//", "xform/^ch//", "xform/^zh//", "xform/'sh/'/", "xform/'ch/'/", "xform/'zh/'/", "xform/uo(?=^|$|')//", "xform/[uv]n(?=^|$|')//", "xform/([a-z>])i?ong(?=^|$|')/$1/", "xform/[iu]ang(?=^|$|')//", "xform/([a-z>])en(?=^|$|')/$1/", "xform/([a-z>])eng(?=^|$|')/$1/", "xform/([a-z>])ang(?=^|$|')/$1/", "xform/ian(?=^|$|')//", "xform/([a-z>])an(?=^|$|')/$1/", "xform/iao(?=^|$|')//", "xform/([a-z>])ao(?=^|$|')/$1/", "xform/([a-z>])ai(?=^|$|')/$1/", "xform/([a-z>])ei(?=^|$|')/$1/", "xform/ie(?=^|$|')//", "xform/ui(?=^|$|')//", "xform/([a-z>])ou(?=^|$|')/$1

/", "xform/in(?=^|$|')//", "xform/'|<|>//", }, -- 国标双拼(gbpy) gbpy = { "derive/^([aoe])([ioun])(?=^|$|')/$1$1$2/", "derive/'([aoe])([ioun])(?=^|$|')/'$1$1$2/", "xform/^([aoe])(ng)?(?=^|$|')/$1$1$2/", "xform/'([aoe])(ng)?(?=^|$|')/'$1$1$2/", "xform/iu(?=^|$|')//", "xform/(.)ei(?=^|$|')/$1/", "xform/uan(?=^|$|')//", "xform/[uv]e(?=^|$|')//", "xform/un(?=^|$|')//", "xform/^sh//", "xform/^ch//", "xform/^zh//", "xform/'sh/'/", "xform/'ch/'/", "xform/'zh/'/", "xform/uo(?=^|$|')//", "xform/ie(?=^|$|')//", "xform/([a-z>])i?ong(?=^|$|')/$1/", "xform/ing(?=^|$|')|uai(?=^|$|')//", "xform/([a-z>])ai(?=^|$|')/$1/", "xform/([a-z>])en(?=^|$|')/$1/", "xform/([a-z>])eng(?=^|$|')/$1/", "xform/[iu]ang(?=^|$|')//", "xform/([a-z>])ang(?=^|$|')/$1/", "xform/ian(?=^|$|')//", "xform/([a-z>])an(?=^|$|')/$1/", "xform/([a-z>])ou(?=^|$|')/$1

/", "xform/[iu]a(?=^|$|')//", "xform/iao(?=^|$|')//", "xform/([a-z>])ao(?=^|$|')/$1/", "xform/ui(?=^|$|')//", "xform/in(?=^|$|')//", "xform/'|<|>//", }, } -- 根据输入法类型选择一套规则(只看 id) local function pick_rules(env) local wanx = get_wanxiang() local id = 'pinyin' if wanx and type(wanx.get_input_method_type) == 'function' then local ok, ret_id = pcall(wanx.get_input_method_type, env) if ok and type(ret_id) == 'string' and #ret_id > 0 then id = ret_id end end return LOCAL_PROJECTION_RULES[id] or LOCAL_PROJECTION_RULES['pinyin'] or {} end ------------------------------------------------------------ -- 工具函数 ------------------------------------------------------------ local function alt_lua_punc(s) if s then return s:gsub('([%.%+%-%*%?%[%]%^%$%(%)%%])', '%%%1') else return '' end end -- 仅保留纯小写字母 local function is_pure_lower_alpha(s) return type(s) == "string" and s:match("^[a-z]+$") ~= nil end local function is_all_upper(s) return s:match('^%u+$') ~= nil end local function is_all_lower(s) return s:match('^%l+$') ~= nil end local function add_to_set_list(set_map, list, elem) if not elem or #elem == 0 then return end if not set_map[elem] then set_map[elem] = true table.insert(list, elem) end end ------------------------------------------------------------ -- 规则应用 / 反查逻辑 ------------------------------------------------------------ local function expand_code_variant(code_projection, part) local out, seen = {}, {} local function add(s) add_to_set_list(seen, out, s) end add(part) if code_projection then local p = code_projection:apply(part, true) if p and #p > 0 then add(p) end end local base = {} for i = 1, #out do base[i] = out[i] end for _, s in ipairs(base) do if is_all_upper(s) then add(string.lower(s)) end -- 笔画:仅转小写参与 if #s == 4 and is_all_lower(s) then -- 4 小写 → 取 1/3 local s13 = s:sub(1,1) .. s:sub(3,3) add(s13) end end return out end local function build_reverse_group(code_projection, db_table, text) local group, seen = {}, {} for _, db in ipairs(db_table) do local code = db:lookup(text) if code and #code > 0 then for part in code:gmatch('%S+') do local variants = expand_code_variant(code_projection, part) for _, v in ipairs(variants) do add_to_set_list(seen, group, v) end end end end -- 最终清理:只保留纯小写字母 local cleaned, seen2 = {}, {} for _, v in ipairs(group) do v = tostring(v) if is_pure_lower_alpha(v) then add_to_set_list(seen2, cleaned, v) end end return cleaned end -- 不支持通配;global_match=true 为“包含”,否则“前缀” local function group_match(group, fuma, global_match) if not fuma or #fuma == 0 then return false end local patt = alt_lua_punc(string.lower(fuma)) for _, elem in ipairs(group) do local e = string.lower(elem) if global_match then if e:find(patt) then return true end else if e:find('^' .. patt) then return true end end end return false end -- 单字优先 local function handle_long_cand(if_single_char_first, cand, long_word_cands) if if_single_char_first and utf8.len(cand.text) > 1 then table.insert(long_word_cands, cand) else yield(cand) end end ------------------------------------------------------------ -- 过滤器主体 ------------------------------------------------------------ local f = {} function f.init(env) local config = env.engine.schema.config -- 反查 db env.if_reverse_lookup = false env.db_table = nil local db = config:get_list("wanxiang_lookup/lookup") if db and db.size > 0 then env.db_table = {} for i = 0, db.size - 1 do table.insert(env.db_table, ReverseLookup(db:get_value_at(i).value)) end env.if_reverse_lookup = true end if not env.if_reverse_lookup then return end -- 内置规则 + 自动选择(不读 schema 的 format) do local rules = pick_rules(env) if type(rules) == 'table' and #rules > 0 then env.code_projection = Projection() env.code_projection:load(rules) else env.code_projection = nil end end -- 引导键:优先从 wanxiang_lookup/key 读;否则默认 ` env.search_key_str = config:get_string('wanxiang_lookup/key') or '`' env.search_key_alt = alt_lua_punc(env.search_key_str) -- tags local tag = config:get_list('wanxiang_lookup/tags') if tag and tag.size > 0 then env.tag = {} for i = 0, tag.size - 1 do table.insert(env.tag, tag:get_value_at(i).value) end else env.tag = { 'abc' } end -- 选词接管:词组保留引导码,否则上屏 env.notifier = env.engine.context.select_notifier:connect(function(ctx) local input = ctx.input local code = input:match('^(.-)' .. env.search_key_alt) if (not code or #code == 0) then return end local preedit = ctx:get_preedit() local no_search_string = ctx.input:match('^(.-)' .. env.search_key_alt) local edit = preedit.text:match('^(.-)' .. env.search_key_alt) if edit and edit:match('[%w;]') then ctx.input = no_search_string .. env.search_key_str else ctx.input = no_search_string env.commit_code = no_search_string ctx:commit() end end) env._group_cache = setmetatable({}, { __mode = 'kv' }) end function f.func(input, env) if not env.if_reverse_lookup then for cand in input:iter() do yield(cand) end return end local code, fuma = env.engine.context.input:match('^(.-)' .. env.search_key_alt .. '(.+)$') if (not code or #code == 0) or (not fuma or #fuma == 0) then for cand in input:iter() do yield(cand) end return end -- 双段辅码:a`X`Y(第二段匹配第二字或第一字“包含”) local fuma_2 if fuma:find(env.search_key_alt) then fuma, fuma_2 = fuma:match('^(.-)' .. env.search_key_alt .. '(.*)$') end local if_single_char_first = env.engine.context:get_option('char_priority') local long_word_cands = {} for cand in input:iter() do if cand.type == 'sentence' then goto skip end local cand_text = cand.text local text = cand_text local text_2 = nil if utf8.len(cand_text) and utf8.len(cand_text) > 1 then text = cand_text:sub(1, utf8.offset(cand_text, 2) - 1) local cand_text_2 = cand_text:gsub('^' .. text, '') text_2 = cand_text_2:sub(1, utf8.offset(cand_text_2, 2) - 1) end local group1 = env._group_cache[text] if not group1 then group1 = build_reverse_group(env.code_projection, env.db_table, text) env._group_cache[text] = group1 end local ok = false if fuma_2 and #fuma_2 > 0 then local group2 = nil if text_2 then group2 = env._group_cache[text_2] if not group2 then group2 = build_reverse_group(env.code_projection, env.db_table, text_2) env._group_cache[text_2] = group2 end end ok = group_match(group1, fuma, false) and ( (group2 and group_match(group2, fuma_2, false)) or group_match(group1, fuma_2, true) -- 第一字“包含” ) else ok = group_match(group1, fuma, false) -- 单段:前缀匹配第一字 end if ok then handle_long_cand(if_single_char_first, cand, long_word_cands) end ::skip:: end for _, c in ipairs(long_word_cands) do yield(c) end end function f.tags_match(seg, env) for _, v in ipairs(env.tag) do if seg.tags[v] then return true end end return false end function f.fini(env) if env.if_reverse_lookup and env.notifier then env.notifier:disconnect() end env.db_table = nil env._group_cache = nil collectgarbage('collect') end return f