mirror of
https://github.com/d0zingcat/rime_wanxiang.git
synced 2026-05-13 15:10:03 +00:00
refactor(english): 全新的英文方案与配套整句体验
- 智能上屏加空格,支持空格、回车打断信号,换行不会输出一个前面有空格的单词 - 首字母大写格式化,双大写则全大写格式化,支持句中对应位置操作 - 之前万象为了整洁与混合方案放在一起,很多以为万象没有英文,那个时候是词语级的,现在升级为更加智能的整句输入体验,再次独立出来未来将更深入维护,同时也是为了配合.netx切换方案
This commit is contained in:
4
custom/wanxiang_english.custom.yaml
Normal file
4
custom/wanxiang_english.custom.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
patch:
|
||||
speller/algebra:
|
||||
__include: wanxiang_algebra:/english/通用规则
|
||||
__patch: wanxiang_algebra:/english/全拼 #可选的选项有(全拼, 自然码, 小鹤双拼, 微软双拼, 搜狗双拼, 智能ABC, 紫光双拼, 拼音加加, 自然龙, 汉心龙)
|
||||
@@ -12,7 +12,8 @@ schema:
|
||||
【文本框输入:/pinyin全拼,/zrm自然码,/flypy小鹤,/mspy,/sogou,/pyjj等,详见README.md】
|
||||
dependencies:
|
||||
- wanxiang_mixedcode #中英文混合词汇
|
||||
- wanxiang_reverse # 部件拆字,反查及辅码
|
||||
- wanxiang_reverse #部件拆字,反查及辅码
|
||||
- wanxiang_english #中英文混合词汇
|
||||
- wanxiang_chaifen #辅助码拆分注释、翻译注释、行政区划匹配、车牌、等等注释类显示滤镜,Lua专用:super_comment
|
||||
|
||||
|
||||
@@ -62,7 +63,7 @@ engine:
|
||||
- lua_processor@*super_tips #超级提示模块:表情、简码、翻译、化学式、等等靠你想象
|
||||
- lua_processor@*limit_repeated #用于限制最大候选长度以及最大重复输入声母编码长度,避免性能异常
|
||||
- lua_processor@*backspace_limit #防止连续 Backspace 在编码为空时删除已上屏内容
|
||||
- lua_processor@*kp_number_processor #管理小键盘的处理逻辑,有输入中数字不上屏和数字一直不上屏设置可选
|
||||
- lua_processor@*kp_number_processor #管理主键盘小键盘的数字处理逻辑,有输入中数字不上屏和数字一直不上屏设置可选
|
||||
- lua_processor@*super_segmentation #通过双击分词符号触发重新分词,并在持续输入分词符号时,能在预设方式之间循环,用于应对类似自然码:必输 必须是 为相同编码导致的必输前置的问题
|
||||
- ascii_composer #处理英文模式及中英文切换
|
||||
- recognizer #与 matcher 搭配,处理符合特定规则的输入码,如网址、反查等 tags
|
||||
@@ -93,7 +94,8 @@ engine:
|
||||
- lua_translator@*input_statistics #一个输入统计的脚本,以日、周、月、年等维度的统计
|
||||
- table_translator@custom_phrase #自定义短语 custom_phrase.txt,用于置顶自定义编码候选词
|
||||
- table_translator@chengyu #简码成语词汇表导入
|
||||
- table_translator@wanxiang_mixedcode #中英等混合词汇表导入
|
||||
- table_translator@wanxiang_english #英文词汇表导入
|
||||
- table_translator@wanxiang_mixedcode #混合编码词汇表导入
|
||||
- table_translator@wanxiang_reverse #挂接部件组字和笔画反查
|
||||
- script_translator@user_dict_set #自造词之使用词汇入口
|
||||
- script_translator@add_user_dict #自造词之制造词汇入口
|
||||
@@ -101,7 +103,8 @@ engine:
|
||||
- reverse_lookup_filter@radical_reverse_lookup #部件拆字滤镜,放在super_comment前面,进一步被超级注释处理以获得拼音编码的提示
|
||||
- lua_filter@*auto_phrase #comment前,无感造词,关闭调频的时候将汉字写入次翻译器,当没有英文候选的时候追加\上屏可完成英文造词
|
||||
- lua_filter@*super_lookup #comment前,字词输入中反查辅助筛选
|
||||
- lua_filter@*super_filter #comment前,功能太多详见Lua文件
|
||||
- lua_filter@*super_filter #comment前,相关功能见Lua文件
|
||||
- lua_filter@*super_english #comment前,负责英文方案及中英混输中英文单词格式化,语句流,自动加空格等策略
|
||||
- lua_filter@*super_comment_preedit #OpenCC前,超级注释模块、超级preedit,支持错词提示、辅助码显示,部件组字读音注释,有声调、无声调全拼编码的转换,支持个性化配置和关闭相应的功能,详情搜索super_comment_preedit进行详细配置
|
||||
- simplifier@emoji #Emoji滤镜
|
||||
- simplifier@s2t #简繁切换通繁
|
||||
@@ -392,12 +395,21 @@ chengyu:
|
||||
initial_quality: 1.2 #本表词和系统词重码居后
|
||||
|
||||
# 中文、英文、数字、符号等混合词汇
|
||||
wanxiang_mixedcode:
|
||||
dictionary: wanxiang_mixedcode
|
||||
wanxiang_english:
|
||||
dictionary: wanxiang_english
|
||||
user_dict: en
|
||||
enable_completion: true
|
||||
enable_sentence: true
|
||||
initial_quality: 2
|
||||
comment_format: #这里很重要如果残留带声调字母,剩余编码提示计算出错引发程序崩溃
|
||||
- xform/^~.+$//
|
||||
wanxiang_mixedcode:
|
||||
dictionary: wanxiang_mixedcode
|
||||
enable_completion: true
|
||||
enable_sentence: false
|
||||
initial_quality: 2.1
|
||||
comment_format: #这里很重要如果残留带声调字母,剩余编码提示计算出错引发程序崩溃
|
||||
- xform/^~.+$//
|
||||
|
||||
# Emoji
|
||||
emoji:
|
||||
|
||||
103658
dicts/en.dict.yaml
103658
dicts/en.dict.yaml
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -46542,6 +46542,7 @@ sort: by_weight
|
||||
俄国外交官 é guó wài jiāo guān 1
|
||||
额贺福志郎 é hè fú zhì láng 1
|
||||
腭后天畸形 è hòu tiān jī xíng 1
|
||||
鄂侯驭方鼎 è hóu yù fāng dǐng 1
|
||||
恶环壬四烯 è huán rén sì xī 1
|
||||
𫫇环壬四烯 ě huán rén sì xī 1
|
||||
鄂霍次克海 è huò cì kè hǎi 1
|
||||
@@ -168655,6 +168656,7 @@ sort: by_weight
|
||||
省高级法院 shěng gāo jí fǎ yuàn 1
|
||||
省高速公路 shěng gāo sù gōng lù 1
|
||||
省高校新区 shěng gāo xiào xīn qū 1
|
||||
笙歌方一歇 shēng gē fāng yī xiē 1
|
||||
生个孩子吧 shēng gè hái zi ba 1
|
||||
省公安厅的 shěng gōng ān tīng de 1
|
||||
省公安学校 shěng gōng ān xué xiào 1
|
||||
@@ -171119,7 +171121,6 @@ sort: by_weight
|
||||
市第一小学 shì dì yī xiǎo xué 1
|
||||
市第一医院 shì dì yī yī yuàn 1
|
||||
市第一中学 shì dì yī zhōng xué 1
|
||||
是地藏菩萨 shì dì zàng pú sà 1
|
||||
嗜碘阿米巴 shì diǎn ā mǐ bā 1
|
||||
十点差三分 shí diǎn chà sān fēn 1
|
||||
市电力公司 shì diàn lì gōng sī 1
|
||||
@@ -236506,6 +236507,7 @@ sort: by_weight
|
||||
应注意观察 yīng zhù yì guān chá 1
|
||||
应注意哪些 yīng zhù yì nǎ xiē 1
|
||||
应注意事项 yīng zhù yì shì xiàng 1
|
||||
莺啭柳洲亭 yīng zhuàn liǔ zhōu tíng 1
|
||||
应准予离婚 yīng zhǔn yǔ lí hūn 1
|
||||
影子的传说 yǐng zi de chuán shuō 1
|
||||
影子的世界 yǐng zi de shì jiè 1
|
||||
|
||||
@@ -286254,6 +286254,7 @@ sort: by_weight
|
||||
三足鼎中知味久 sān zú dǐng zhōng zhī wèi jiǔ 1
|
||||
三足之乌足恐断 sān zú zhī wū zú kǒng duàn 1
|
||||
散作千林火迫春 sàn zuò qiān lín huǒ pò chūn 1
|
||||
散作千溪遍万家 sàn zuò qiān xī biàn wàn jiā 1
|
||||
散作人间万窍风 sàn zuò rén jiān wàn qiào fēng 1
|
||||
散作霜天落叶风 sàn zuò shuāng tiān luò yè fēng 1
|
||||
桑蚕作茧自缠裹 sāng cán zuò jiǎn zì chán guǒ 1
|
||||
|
||||
@@ -294,7 +294,7 @@ sort: by_weight
|
||||
暗蜂属 àn fēng shǔ 77
|
||||
暗茴鱼 àn huí yú 80
|
||||
安蕨属 ān jué shǔ 91
|
||||
安康鱼 ān kāng yú 170
|
||||
𩽾𩾌鱼 ān kāng yú 170
|
||||
安兰属 ān lán shǔ 85
|
||||
鞍龙科 ān lóng kē 68
|
||||
鞍龙属 ān lóng shǔ 66
|
||||
|
||||
@@ -42,6 +42,8 @@ local function replace_schema(file_path, target_schema)
|
||||
content = content:gsub("([%s]*__include:%s*wanxiang_algebra:/reverse/)%S+", "%1" .. target_schema)
|
||||
elseif file_path:find("wanxiang_mixedcode") then
|
||||
content = content:gsub("([%s]*__patch:%s*wanxiang_algebra:/mixed/)%S+", "%1" .. target_schema)
|
||||
elseif file_path:find("wanxiang_english") then
|
||||
content = content:gsub("([%s]*__patch:%s*wanxiang_algebra:/english/)%S+", "%1" .. target_schema)
|
||||
elseif file_path:find("wanxiang%.custom") or file_path:find("wanxiang_pro%.custom") then
|
||||
content = content:gsub("([%s%-]*wanxiang_algebra:/pro/)%S+", "%1" .. target_schema, 1)
|
||||
content = content:gsub("([%s%-]*wanxiang_algebra:/base/)%S+", "%1" .. target_schema, 1)
|
||||
|
||||
387
lua/super_english.lua
Normal file
387
lua/super_english.lua
Normal file
@@ -0,0 +1,387 @@
|
||||
-- lua/super_english.lua
|
||||
-- https://github.com/amzxyz/rime_wanxiang
|
||||
-- @description: 英文全能处理器 (Fix: 动态分隔符兼容)
|
||||
-- @author: amzxyz
|
||||
--
|
||||
-- 核心功能清单:
|
||||
-- 1. [Format] 语句级英文大写格式化,逐词大小写对应 (look HELLO -> look HELLO)
|
||||
-- 2. [Spacing] 智能语句空格切分,智能单词上屏加空格 (Smart Spacing) 与无损分词还原
|
||||
-- 3. [Memory] 全量历史缓存,完美解决回删乱码问题
|
||||
-- 4. [Construct] 原生优先构造策略 (短词无分词则重置为原生输入)
|
||||
-- 5. [Order] 单字母(a/A) 智能插队排序,补齐单字母候选
|
||||
local F = {}
|
||||
|
||||
-- 引入常用函数
|
||||
local byte = string.byte
|
||||
local find = string.find
|
||||
local gsub = string.gsub
|
||||
local upper = string.upper
|
||||
local lower = string.lower
|
||||
local sub = string.sub
|
||||
local match = string.match
|
||||
local format = string.format
|
||||
|
||||
--====================================================
|
||||
-- 1. 基础工具函数
|
||||
--====================================================
|
||||
|
||||
local function pure(s)
|
||||
return gsub(s, "[^a-zA-Z]", ""):lower()
|
||||
end
|
||||
|
||||
local function is_ascii_phrase_fast(s)
|
||||
if s == "" then return false end
|
||||
for i = 1, #s do
|
||||
local b = byte(s, i)
|
||||
if b > 127 then return false end
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
local function has_letters(s)
|
||||
return find(s, "[a-zA-Z]")
|
||||
end
|
||||
|
||||
-- 序列匹配:返回 (首字母位置, 最后一个匹配字符的位置)
|
||||
local function find_target_in_text(text, start_pos, target_fp)
|
||||
local text_len = #text
|
||||
local target_len = #target_fp
|
||||
if target_len == 0 then return nil, nil end
|
||||
|
||||
local t_idx = 1
|
||||
local scan_p = start_pos
|
||||
local s_index = nil
|
||||
|
||||
while scan_p <= text_len and t_idx <= target_len do
|
||||
local char_txt = sub(text, scan_p, scan_p)
|
||||
if lower(char_txt) == sub(target_fp, t_idx, t_idx) then
|
||||
if t_idx == 1 then s_index = scan_p end
|
||||
t_idx = t_idx + 1
|
||||
end
|
||||
scan_p = scan_p + 1
|
||||
end
|
||||
|
||||
if t_idx > target_len then
|
||||
return s_index, scan_p - 1
|
||||
end
|
||||
return nil, nil
|
||||
end
|
||||
|
||||
--====================================================
|
||||
-- 2. 核心逻辑:格式化与还原
|
||||
--====================================================
|
||||
|
||||
-- [核心修复] 使用锚点切分法
|
||||
local function restore_sentence_spacing(cand, split_pattern, check_pattern)
|
||||
local guide = cand.preedit or ""
|
||||
|
||||
-- 1. 只有存在分隔符时才介入
|
||||
if not find(guide, check_pattern) then return cand end
|
||||
|
||||
local text = cand.text
|
||||
|
||||
-- 2. 提取所有目标片段 (hi'vcs -> {hi, vcs})
|
||||
local targets = {}
|
||||
for seg in string.gmatch(guide, split_pattern) do
|
||||
local t = pure(seg)
|
||||
if #t > 0 then table.insert(targets, t) end
|
||||
end
|
||||
if #targets == 0 then return cand end
|
||||
|
||||
-- 3. 寻找所有片段在 text 中的“起始锚点”
|
||||
local starts = {}
|
||||
local p = 1
|
||||
for _, target in ipairs(targets) do
|
||||
-- 注意:这里只需要 s (起始位置) 和 e (用于更新搜索进度)
|
||||
local s, e = find_target_in_text(text, p, target)
|
||||
if not s then
|
||||
-- 如果任何一段对不上 (说明 preedit 和 text 不匹配),则放弃处理,原样返回
|
||||
return cand
|
||||
end
|
||||
table.insert(starts, s)
|
||||
p = e + 1
|
||||
end
|
||||
|
||||
-- 4. 根据锚点进行切分
|
||||
local parts = {}
|
||||
|
||||
-- 处理第一段之前的残留文本 (如果有)
|
||||
if starts[1] > 1 then
|
||||
table.insert(parts, sub(text, 1, starts[1] - 1))
|
||||
end
|
||||
|
||||
for i = 1, #starts do
|
||||
local current_s = starts[i]
|
||||
local next_s = starts[i+1] -- 下一段的起点
|
||||
local chunk_end
|
||||
|
||||
if next_s then
|
||||
-- 如果有下一段,当前段结束于下一段起点之前
|
||||
chunk_end = next_s - 1
|
||||
else
|
||||
-- 如果是最后一段,一直延伸到文本末尾 (修复了 vcs -> ystem 丢失的问题)
|
||||
chunk_end = #text
|
||||
end
|
||||
|
||||
table.insert(parts, sub(text, current_s, chunk_end))
|
||||
end
|
||||
|
||||
-- 5. 拼接并清理多余空格
|
||||
local new_text = table.concat(parts, " ")
|
||||
new_text = gsub(new_text, "%s%s+", " ")
|
||||
|
||||
if new_text == "" then return cand end
|
||||
|
||||
local nc = Candidate(cand.type, cand.start, cand._end, new_text, cand.comment)
|
||||
nc.preedit = cand.preedit
|
||||
return nc
|
||||
end
|
||||
|
||||
local NBSP = string.char(0xC2, 0xA0)
|
||||
|
||||
local function apply_segment_formatting(text, input_code)
|
||||
if not input_code or input_code == "" then return text end
|
||||
|
||||
local parts = {}
|
||||
local p_code = 1
|
||||
|
||||
for word in string.gmatch(text, "%S+") do
|
||||
local clean_word = pure(word)
|
||||
local w_len = #clean_word
|
||||
|
||||
if w_len > 0 then
|
||||
if find(word, "[\128-\255]") then
|
||||
local input_remain = #input_code - p_code + 1
|
||||
if input_remain > 0 then
|
||||
local check_len = (w_len < input_remain) and w_len or input_remain
|
||||
p_code = p_code + check_len
|
||||
end
|
||||
else
|
||||
local input_remain = #input_code - p_code + 1
|
||||
if input_remain > 0 then
|
||||
local check_len = (w_len < input_remain) and w_len or input_remain
|
||||
local segment = sub(input_code, p_code, p_code + check_len - 1)
|
||||
local is_pure_alpha = not find(word, "[^a-zA-Z]")
|
||||
|
||||
if find(segment, "^%u%u") and is_pure_alpha then
|
||||
word = upper(word)
|
||||
elseif find(segment, "^%u") then
|
||||
word = gsub(word, "^%a", upper)
|
||||
end
|
||||
p_code = p_code + check_len
|
||||
end
|
||||
end
|
||||
end
|
||||
table.insert(parts, word)
|
||||
end
|
||||
|
||||
return table.concat(parts, " ")
|
||||
end
|
||||
|
||||
local function apply_formatting(cand, code_ctx)
|
||||
local text = cand.text
|
||||
if not text or text == "" then return cand end
|
||||
local changed = false
|
||||
|
||||
local norm = gsub(text, NBSP, " ")
|
||||
if norm ~= text then text = norm; changed = true end
|
||||
|
||||
if is_ascii_phrase_fast(text) and has_letters(text) then
|
||||
if code_ctx.raw_input then
|
||||
local new_text = apply_segment_formatting(text, code_ctx.raw_input)
|
||||
if new_text ~= text then
|
||||
text = new_text
|
||||
changed = true
|
||||
end
|
||||
end
|
||||
|
||||
if code_ctx.spacing_mode and code_ctx.spacing_mode ~= "off" then
|
||||
local mode = code_ctx.spacing_mode
|
||||
if mode == "smart" then
|
||||
if code_ctx.prev_is_eng then
|
||||
if not find(text, "^%s") then text = " " .. text; changed = true end
|
||||
end
|
||||
elseif mode == "before" then
|
||||
if not find(text, "^%s") then text = " " .. text; changed = true end
|
||||
elseif mode == "after" then
|
||||
if not find(text, "%s$") then text = text .. " "; changed = true end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if not changed then return cand end
|
||||
local nc = Candidate(cand.type, cand.start, cand._end, text, cand.comment)
|
||||
nc.preedit = cand.preedit
|
||||
return nc
|
||||
end
|
||||
|
||||
--====================================================
|
||||
-- 3. 状态管理 (Filter)
|
||||
--====================================================
|
||||
|
||||
function F.init(env)
|
||||
env.memory = {}
|
||||
local cfg = env.engine.schema.config
|
||||
|
||||
env.english_spacing_mode = "off"
|
||||
if cfg then
|
||||
local str = cfg:get_string("english_spacing")
|
||||
if str then env.english_spacing_mode = str end
|
||||
end
|
||||
|
||||
-- 读取分隔符 (兼容空格和自定义符号)
|
||||
local delimiter_str = " '"
|
||||
if cfg then
|
||||
delimiter_str = cfg:get_string('speller/delimiter') or delimiter_str
|
||||
end
|
||||
|
||||
local escaped_delims = gsub(delimiter_str, "([%%%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1")
|
||||
env.split_pattern = "[^" .. escaped_delims .. "]+"
|
||||
env.delim_check_pattern = "[" .. escaped_delims .. "]"
|
||||
|
||||
env.prev_commit_is_eng = false
|
||||
if env.engine.context then
|
||||
env.commit_notifier = env.engine.context.commit_notifier:connect(function(ctx)
|
||||
local commit_text = ctx:get_commit_text()
|
||||
local is_eng = is_ascii_phrase_fast(commit_text)
|
||||
if not is_eng then
|
||||
local clean = gsub(commit_text, "%s+$", "")
|
||||
if clean == "," or clean == "." or clean == "!" or clean == "?" then is_eng = true end
|
||||
end
|
||||
env.prev_commit_is_eng = is_eng
|
||||
ctx:set_property("english_spacing", "")
|
||||
end)
|
||||
end
|
||||
end
|
||||
|
||||
function F.fini(env)
|
||||
if env.commit_notifier then env.commit_notifier:disconnect(); env.commit_notifier = nil end
|
||||
env.memory = nil
|
||||
end
|
||||
|
||||
--====================================================
|
||||
-- 4. 主逻辑 (Filter)
|
||||
--====================================================
|
||||
|
||||
function F.func(input, env)
|
||||
local ctx = env.engine.context
|
||||
local curr_input = ctx.input
|
||||
local has_valid_candidate = false
|
||||
local best_candidate_saved = false
|
||||
local code_len = #curr_input
|
||||
|
||||
local break_signal = (ctx:get_property("english_spacing") == "true")
|
||||
local effective_prev_is_eng = env.prev_commit_is_eng
|
||||
if break_signal then effective_prev_is_eng = false end
|
||||
|
||||
local code_ctx = {
|
||||
raw_input = curr_input,
|
||||
spacing_mode = env.english_spacing_mode,
|
||||
prev_is_eng = effective_prev_is_eng
|
||||
}
|
||||
|
||||
local single_char_injected = false
|
||||
local c_lower, c_upper = nil, nil
|
||||
if code_len == 1 then
|
||||
local b = byte(curr_input)
|
||||
if (b >= 65 and b <= 90) or (b >= 97 and b <= 122) then
|
||||
local lower_t = lower(curr_input)
|
||||
local upper_t = upper(curr_input)
|
||||
c_lower = Candidate("completion", 0, 1, lower_t, "")
|
||||
c_upper = Candidate("completion", 0, 1, upper_t, "")
|
||||
else single_char_injected = true end
|
||||
else single_char_injected = true end
|
||||
|
||||
for cand in input:iter() do
|
||||
-- 传入 Pattern 进行智能还原
|
||||
local good_cand = restore_sentence_spacing(cand, env.split_pattern, env.delim_check_pattern)
|
||||
local fmt_cand = apply_formatting(good_cand, code_ctx)
|
||||
local is_ascii = is_ascii_phrase_fast(fmt_cand.text)
|
||||
|
||||
if not single_char_injected and is_ascii and c_lower then
|
||||
if not best_candidate_saved then
|
||||
env.memory[curr_input] = { text = c_lower.text, preedit = c_lower.text }
|
||||
best_candidate_saved = true
|
||||
end
|
||||
yield(c_lower)
|
||||
yield(c_upper)
|
||||
single_char_injected = true
|
||||
has_valid_candidate = true
|
||||
end
|
||||
|
||||
local is_garbage = (cand.type == "raw") or (fmt_cand.text == curr_input)
|
||||
|
||||
if not is_garbage then
|
||||
has_valid_candidate = true
|
||||
if not best_candidate_saved and cand.comment ~= "~" then
|
||||
env.memory[curr_input] = {
|
||||
text = fmt_cand.text,
|
||||
preedit = fmt_cand.preedit or fmt_cand.text
|
||||
}
|
||||
best_candidate_saved = true
|
||||
end
|
||||
end
|
||||
yield(fmt_cand)
|
||||
end
|
||||
|
||||
if not single_char_injected and c_lower then
|
||||
if not best_candidate_saved then
|
||||
env.memory[curr_input] = { text = c_lower.text, preedit = c_lower.text }
|
||||
best_candidate_saved = true
|
||||
end
|
||||
yield(c_lower)
|
||||
yield(c_upper)
|
||||
has_valid_candidate = true
|
||||
end
|
||||
|
||||
-- [Phase 3] 构造补全
|
||||
if not has_valid_candidate then
|
||||
if not has_letters(curr_input) then return end
|
||||
local anchor = nil
|
||||
local diff = ""
|
||||
|
||||
for i = #curr_input - 1, 1, -1 do
|
||||
local prefix = sub(curr_input, 1, i)
|
||||
if env.memory[prefix] then
|
||||
anchor = env.memory[prefix]
|
||||
diff = sub(curr_input, i + 1)
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
if anchor and diff ~= "" then
|
||||
local has_spacing = find(anchor.text, " ")
|
||||
local last_word = match(anchor.text, "(%S+)%s*$") or ""
|
||||
local last_len = #last_word
|
||||
|
||||
local output_text = ""
|
||||
local output_preedit = ""
|
||||
|
||||
if has_spacing then
|
||||
output_text = anchor.text .. diff
|
||||
output_preedit = (anchor.preedit or anchor.text) .. diff
|
||||
elseif last_len > 3 then
|
||||
local spacer = " "
|
||||
if sub(anchor.text, -1) == " " then spacer = "" end
|
||||
output_text = anchor.text .. spacer .. diff
|
||||
output_preedit = (anchor.preedit or anchor.text) .. spacer .. diff
|
||||
else
|
||||
output_text = curr_input
|
||||
output_preedit = curr_input
|
||||
end
|
||||
|
||||
output_text = apply_segment_formatting(output_text, curr_input)
|
||||
|
||||
local cand = Candidate("completion", 0, #curr_input, output_text, "~")
|
||||
cand.preedit = output_preedit
|
||||
cand.quality = 9999999
|
||||
yield(cand)
|
||||
else
|
||||
local cand = Candidate("completion", 0, #curr_input, curr_input, "~")
|
||||
cand.preedit = curr_input
|
||||
yield(cand)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return F
|
||||
@@ -1,15 +1,11 @@
|
||||
-- @amzxyz https://github.com/amzxyz/rime_wanxiang
|
||||
-- 功能 A:候选文本中的转义序列格式化(始终开启)
|
||||
-- \n \t \r \\ \s(空格) \d(-)
|
||||
-- 功能 B:英文自动大写(始终开启)
|
||||
-- - 首字母大写:输入首字母大写 → 候选首字母大写(Hello)
|
||||
-- - 全部大写:输入前 2+ 个大写 → 候选全大写(HEllo → HELLO)
|
||||
-- - 仅对 ASCII 单词生效;若候选含空格、-、@、#、· 等也认为是英文
|
||||
-- 功能 C:候选重排(仅编码长度 2..6 时)
|
||||
-- 功能 B:候选重排(仅编码长度 2..6 时)
|
||||
-- - 第一候选不动
|
||||
-- - 其余按组输出:①不含字母(table/user_table) → ②其他
|
||||
-- - 若第二候选为 table/user_table,则不排序,直接透传
|
||||
-- 功能 D:成对符号包裹(触发:最后分段完整消耗且出现 prefix\suffix;suffix 命中映射时吞掉 \suffix)
|
||||
-- 功能 C:成对符号包裹(触发:最后分段完整消耗且出现 prefix\suffix;suffix 命中映射时吞掉 \suffix)
|
||||
-- 缓存/锁定:
|
||||
-- - 未锁定时记录第一候选为缓存
|
||||
-- - 出现 prefix\suffix 且 prefix 非空 ⇒ 锁定
|
||||
@@ -18,16 +14,15 @@
|
||||
-- 镜像:
|
||||
-- - schema: paired_symbols/mirror (bool,默认 true)
|
||||
-- - 包裹后可抑制"包裹前文本/包裹后文本"再次出现在后续候选里
|
||||
-- 功能 E:三态语言模式(通过 options 控制,仅在输出层过滤,不改变内部逻辑)
|
||||
-- 功能 D:三态语言模式(通过 options 控制,仅在输出层过滤,不改变内部逻辑)
|
||||
-- - ctx:get_option("en_only") == true → 仅英文:只保留英文候选
|
||||
-- - ctx:get_option("zh_only") == true → 仅中文:丢弃英文候选
|
||||
-- - 两者都 false → 混合模式:中英都输出
|
||||
-- 功能F 字符集过滤,默认8105+𰻝𰻝,可以在方案中定义黑白名单来实现用户自己的范围微调charsetlist: []和charsetblacklist: [𰻝, 𰻞]
|
||||
-- 功能G 由于在混输场景中输入comment commit等等之类的英文时候,由于直接辅助码的派生能力,会将三个好不想干的单字组合在一起,这会造成不好的体验
|
||||
-- 功能E 字符集过滤,默认8105+𰻝𰻝,可以在方案中定义黑白名单来实现用户自己的范围微调charsetlist: []和charsetblacklist: [𰻝, 𰻞]
|
||||
-- 功能F 由于在混输场景中输入comment commit等等之类的英文时候,由于直接辅助码的派生能力,会将三个好不想干的单字组合在一起,这会造成不好的体验
|
||||
-- 因此在首选已经是英文的时候,且type=completion且大于等于4个字符,这个时候后面如果有type=sentence的派生词则直接干掉,这个还要依赖,表翻译器
|
||||
-- 权重设置与主翻译器不可相差太大
|
||||
-- 功能H:英文自动空格(english_spacing)
|
||||
-- 支持模式:off(默认), before(前), after(后), smart(连续英文时在前加空格)
|
||||
|
||||
local wanxiang = require("wanxiang")
|
||||
local M = {}
|
||||
|
||||
@@ -77,48 +72,6 @@ local function has_english_token_fast(s)
|
||||
return false
|
||||
end
|
||||
|
||||
local function is_ascii_word_fast(s)
|
||||
if s == "" then return false end
|
||||
for i = 1, #s do
|
||||
local b = byte(s, i)
|
||||
if not ((b >= 65 and b <= 90) or (b >= 97 and b <= 122)) then return false end
|
||||
end
|
||||
return true
|
||||
end
|
||||
local function is_ascii_phrase_fast(s)
|
||||
if s == "" then return false end
|
||||
local has_alpha = false
|
||||
for i = 1, #s do
|
||||
local b = byte(s, i)
|
||||
if b > 127 then
|
||||
return false -- 出现非 ASCII,直接不是英文短语
|
||||
end
|
||||
if (b >= 65 and b <= 90) or (b >= 97 and b <= 122) then
|
||||
has_alpha = true -- 有至少一个字母
|
||||
end
|
||||
end
|
||||
return has_alpha
|
||||
end
|
||||
local function ascii_equal_ignore_case_to_pure(text, pure_code_lc)
|
||||
-- 提取 text 里的所有字母,转成小写
|
||||
local buf = {}
|
||||
for i = 1, #text do
|
||||
local b = byte(text, i)
|
||||
if b >= 65 and b <= 90 then b = b + 32 end -- 大写转小写
|
||||
if b >= 97 and b <= 122 then
|
||||
buf[#buf+1] = string.char(b)
|
||||
elseif b > 127 then
|
||||
-- 出现非 ASCII,直接视为不匹配,防止中文乱入
|
||||
return false
|
||||
end
|
||||
end
|
||||
local letters = table.concat(buf)
|
||||
if #letters ~= #pure_code_lc then
|
||||
return false
|
||||
end
|
||||
return letters == pure_code_lc and (false == false and true or false) or false
|
||||
end
|
||||
|
||||
-- ========= 英文候选判定 =========
|
||||
-- 使用现有的 has_english_token_fast 叠加 is_table_type:
|
||||
-- - 若不属于 table/user_table/fixed:只要含英文 token 即视为英文候选
|
||||
@@ -144,22 +97,6 @@ local function is_english_candidate(cand)
|
||||
return true
|
||||
end
|
||||
|
||||
-- ========= 空白规范化 ==========
|
||||
local NBSP = string.char(0xC2, 0xA0) -- U+00A0 不换行空格
|
||||
local FWSP = string.char(0xE3, 0x80, 0x80) -- U+3000 全角空格
|
||||
local ZWSP = string.char(0xE2, 0x80, 0x8B) -- U+200B 零宽空格
|
||||
local BOM = string.char(0xEF, 0xBB, 0xBF) -- U+FEFF BOM
|
||||
local ZWNJ = string.char(0xE2, 0x80, 0x8C) -- U+200C 零宽不连字
|
||||
local ZWJ = string.char(0xE2, 0x80, 0x8D) -- U+200D 零宽连字
|
||||
|
||||
local function normalize_spaces(s)
|
||||
if not s or s == "" then return s end
|
||||
-- opencc 中译英转换英文间隔空格为正常空格
|
||||
s = s:gsub(NBSP, " ")
|
||||
-- :gsub(FWSP, " ")
|
||||
return s
|
||||
end
|
||||
|
||||
-- ========= 文本格式化(转义 + 自动大写)=========
|
||||
local escape_map = {
|
||||
["\\n"] = "\n", ["\\t"] = "\t", ["\\r"] = "\r",
|
||||
@@ -173,17 +110,10 @@ local function apply_escape_fast(text)
|
||||
return new_text, new_text ~= text
|
||||
end
|
||||
|
||||
local function format_and_autocap(cand, code_ctx)
|
||||
local function format_and_autocap(cand)
|
||||
local text = cand.text
|
||||
if not text or text == "" then return cand end
|
||||
|
||||
local changed = false
|
||||
-- 空白规范化
|
||||
local norm = normalize_spaces(text)
|
||||
if norm ~= text then
|
||||
text = norm
|
||||
changed = true
|
||||
end
|
||||
-- 转义替换 (\n, \t, \s 等)
|
||||
-- 必须先处理转义,因为转义可能会改变字符串开头 (如 \sApple -> Apple)
|
||||
if find(text, "\\", 1, true) then
|
||||
@@ -193,48 +123,6 @@ local function format_and_autocap(cand, code_ctx)
|
||||
changed = true
|
||||
end
|
||||
end
|
||||
-- 状态检测:转义完成后,检测当前文本是否为 ASCII 短语
|
||||
local b1 = byte(text, 1)
|
||||
local is_ascii_phrase = (b1 and b1 <= 127 and is_ascii_phrase_fast(text))
|
||||
-- 英文自动大写
|
||||
if code_ctx.enable_cap and is_ascii_phrase then
|
||||
local pure_word = is_ascii_word_fast(text)
|
||||
|
||||
if cand.type == "completion" or ascii_equal_ignore_case_to_pure(text, code_ctx.pure_code_lc) then
|
||||
local new_text = nil
|
||||
|
||||
if code_ctx.all_upper and pure_word then
|
||||
-- 全大写逻辑 (HELLO)
|
||||
new_text = upper(text)
|
||||
else
|
||||
-- 首字母大写逻辑 (Hello)
|
||||
new_text = text:gsub("^%a", string.upper)
|
||||
end
|
||||
|
||||
if new_text and new_text ~= text then
|
||||
text = new_text
|
||||
changed = true
|
||||
end
|
||||
end
|
||||
end
|
||||
-- 英文自动空格
|
||||
if is_ascii_phrase and code_ctx.spacing_mode and code_ctx.spacing_mode ~= "off" then
|
||||
local mode = code_ctx.spacing_mode
|
||||
|
||||
-- smart模式: 若“上次上屏是英文”且“当前也是英文”,则在前面补空格
|
||||
if mode == "smart" then
|
||||
if code_ctx.prev_is_eng then
|
||||
text = " " .. text
|
||||
changed = true
|
||||
end
|
||||
elseif mode == "before" then
|
||||
text = " " .. text
|
||||
changed = true
|
||||
elseif mode == "after" then
|
||||
text = text .. " "
|
||||
changed = true
|
||||
end
|
||||
end
|
||||
-- 输出结果
|
||||
if not changed then return cand end
|
||||
|
||||
@@ -552,88 +440,7 @@ local function init_charset_filter(env, cfg)
|
||||
-- charsetblacklist: 黑名单
|
||||
load_charset_list("charsetblacklist", env.charset_block)
|
||||
end
|
||||
-- =======================================================
|
||||
-- 基于英文模式Preedit的智能分段空格
|
||||
-- =======================================================
|
||||
local function restore_sentence_spacing(cand)
|
||||
local guide = cand.preedit or ""
|
||||
-- 如果 Preedit 没空格,说明没分段,直接返回
|
||||
if not string.find(guide, " ") then return cand end
|
||||
|
||||
local text = cand.text
|
||||
local text_len = #text
|
||||
local parts = {}
|
||||
local p = 1 -- 全局扫描指针
|
||||
-- 辅助:取纯字母小写指纹
|
||||
local function pure(s) return string.gsub(s, "[^a-zA-Z]", ""):lower() end
|
||||
local function find_target_in_text(start_pos, target_fp)
|
||||
-- 剪枝:如果剩下的长度比 target 还短,肯定没了
|
||||
if (text_len - start_pos + 1) < #target_fp then return nil, nil end
|
||||
-- 从 start_pos 开始,向后逐字尝试
|
||||
for i = start_pos, text_len do
|
||||
-- 首字母必须匹配,才值得进去做全词扫描
|
||||
-- 比如 target="apple",只有遇到 'a'/'A' 才进去看
|
||||
local char_i = string.sub(text, i, i)
|
||||
local first_char_fp = string.lower(char_i)
|
||||
if first_char_fp == string.sub(target_fp, 1, 1) then
|
||||
-- 潜入匹配:尝试从位置 i 开始凑齐 target
|
||||
local scan_p = i
|
||||
local letters_acc = ""
|
||||
while scan_p <= text_len do
|
||||
local char = string.sub(text, scan_p, scan_p)
|
||||
-- 只收集字母
|
||||
if string.find(char, "[a-zA-Z]") then
|
||||
letters_acc = letters_acc .. string.lower(char)
|
||||
end
|
||||
if letters_acc == target_fp then
|
||||
-- 找到了!返回 (起点, 终点)
|
||||
return i, scan_p
|
||||
end
|
||||
-- 剪枝:如果凑出来的字母已经比目标不一样或更长,说明不是这个
|
||||
-- 比如 target="app", 凑出了 "apx" -> 失败,跳出 while,继续外层 for
|
||||
if #letters_acc > #target_fp then break end
|
||||
if string.sub(letters_acc, 1, #letters_acc) ~= string.sub(target_fp, 1, #letters_acc) then break end
|
||||
scan_p = scan_p + 1
|
||||
end
|
||||
end
|
||||
end
|
||||
return nil, nil
|
||||
end
|
||||
-- 遍历 Preedit 的每一段
|
||||
for seg in string.gmatch(guide, "%S+") do
|
||||
local target = pure(seg)
|
||||
if #target > 0 then
|
||||
-- 在当前指针 p 往后无限寻找
|
||||
local match_start, match_end = find_target_in_text(p, target)
|
||||
if match_start then
|
||||
-- 隔离病灶(中间跳过不匹配的)
|
||||
if match_start > p then
|
||||
local lesion = string.sub(text, p, match_start - 1)
|
||||
table.insert(parts, lesion)
|
||||
end
|
||||
-- 存入正确单词
|
||||
local valid_word = string.sub(text, match_start, match_end)
|
||||
table.insert(parts, valid_word)
|
||||
-- 更新指针
|
||||
p = match_end + 1
|
||||
else
|
||||
-- 没找到:说明这个 preedit 词完全消失了(比如 preedit 有 5 个词,candidate 只有 4 个)
|
||||
-- 策略:忽略这个 guide,继续用下一个 guide 找
|
||||
end
|
||||
end
|
||||
end
|
||||
-- 尾部处理
|
||||
if p <= text_len then
|
||||
local tail = string.sub(text, p)
|
||||
if #parts > 0 then table.insert(parts, tail)
|
||||
else table.insert(parts, tail) end
|
||||
end
|
||||
-- 重组
|
||||
local new_text = table.concat(parts, " ")
|
||||
local nc = Candidate(cand.type, cand.start, cand._end, new_text, cand.comment)
|
||||
nc.preedit = cand.preedit
|
||||
return nc
|
||||
end
|
||||
-- ========= 生命周期 =========
|
||||
function M.init(env)
|
||||
local cfg = env.engine and env.engine.schema and env.engine.schema.config or nil
|
||||
@@ -666,38 +473,7 @@ function M.init(env)
|
||||
local okb, bv = pcall(function() return cfg:get_bool("paired_symbols/mirror") end)
|
||||
if okb and bv ~= nil then env.suppress_mirror = bv end
|
||||
end
|
||||
-- 英文自动空格配置
|
||||
-- off: 关闭; before: 前加; after: 后加; smart: 连续英文时前加
|
||||
env.english_spacing_mode = "off"
|
||||
if cfg then
|
||||
local oks, sv = pcall(function() return cfg:get_string("english_spacing") end)
|
||||
if oks and sv and (sv == "before" or sv == "after" or sv == "smart") then
|
||||
env.english_spacing_mode = sv
|
||||
end
|
||||
end
|
||||
-- 上一次上屏是否为英文(用于 smart 模式)
|
||||
env.prev_commit_is_eng = false
|
||||
-- 注册 commit 通知器来追踪上屏历史
|
||||
if env.engine and env.engine.context then
|
||||
env.commit_notifier = env.engine.context.commit_notifier:connect(function(ctx)
|
||||
local commit_text = ctx:get_commit_text()
|
||||
-- 判断是不是常规的英文单词
|
||||
local is_eng = is_ascii_phrase_fast(commit_text)
|
||||
-- 如果不是单词,再检查是不是单独的英文标点
|
||||
if not is_eng then
|
||||
-- 去掉末尾可能的空格,防止影响判断
|
||||
local clean = commit_text:gsub("%s+$", "")
|
||||
-- 如果是 逗号、句号、感叹号、问号,也强行算作英文
|
||||
if clean == "," or clean == "." or clean == "!" or clean == "?" then
|
||||
is_eng = true
|
||||
end
|
||||
end
|
||||
-- 更新状态
|
||||
env.prev_commit_is_eng = is_eng
|
||||
-- 屏后,立即清除打断信号
|
||||
ctx:set_property("english_spacing", "")
|
||||
end)
|
||||
end
|
||||
|
||||
env.cache = nil -- 首候选缓存(已格式化)
|
||||
env.locked = false -- 是否进入锁定态(检测到 prefix\suffix)
|
||||
|
||||
@@ -711,11 +487,7 @@ function M.init(env)
|
||||
init_charset_filter(env, cfg)
|
||||
end
|
||||
|
||||
function M.fini(env)
|
||||
if env.commit_notifier then
|
||||
env.commit_notifier:disconnect()
|
||||
env.commit_notifier = nil
|
||||
end
|
||||
function M.fini(env)
|
||||
end
|
||||
-- ========= 统一产出通道 =========
|
||||
-- ctxs:
|
||||
@@ -731,44 +503,42 @@ local function emit_with_pipeline(cand, ctxs)
|
||||
|
||||
local env = ctxs.env
|
||||
|
||||
-- 1. 字符集过滤:只有在 charset_strict = true 时才启用
|
||||
-- ① 字符集过滤:只有在 charset_strict = true 时才启用
|
||||
if ctxs.charset_strict and cand.text and cand.text ~= "" then
|
||||
if not in_charset(env, cand.text) then
|
||||
return
|
||||
end
|
||||
end
|
||||
-- 2. 准备变量
|
||||
|
||||
-- ② 三态语言模式
|
||||
local is_en = ctxs.is_english and ctxs.is_english(cand) or false
|
||||
local BAGUA_SYMBOL = "\226\152\175"
|
||||
local is_bagua_sentence = (fast_type(cand) == "sentence") and (cand.comment and string.find(cand.comment, BAGUA_SYMBOL))
|
||||
|
||||
-- 3. 三态语言过滤
|
||||
if ctxs.zh_only and is_en then return end
|
||||
if ctxs.en_only and (not is_en) then return end
|
||||
|
||||
-- 4. 八卦图处理
|
||||
if ctxs.en_only then
|
||||
-- 英文模式:如果是八卦图,用 Preedit 还原空格
|
||||
if is_bagua_sentence then
|
||||
cand = restore_sentence_spacing(cand)
|
||||
if (not ctxs.en_only) and is_en then
|
||||
if cand.comment and string.find(cand.comment, "\226\152\175") then
|
||||
return -- 包含☯的英文句子直接丢弃,不输出
|
||||
end
|
||||
elseif (not ctxs.zh_only) then
|
||||
-- 混合模式:隐藏
|
||||
if is_bagua_sentence then return end
|
||||
end
|
||||
if ctxs.en_only and (not is_en) then
|
||||
return
|
||||
end
|
||||
|
||||
-- 5. 抑制句子
|
||||
if (not ctxs.en_only) and ctxs.drop_sentence_after_completion then
|
||||
if fast_type(cand) == "sentence" then return end
|
||||
if ctxs.zh_only and is_en then
|
||||
return
|
||||
end
|
||||
|
||||
-- 6 镜像抑制
|
||||
-- **③ 若需抑制句子候选:删掉所有 type 为 sentence 的候选(除了首候选本身不会被标记)**
|
||||
if ctxs.drop_sentence_after_completion then
|
||||
if fast_type(cand) == "sentence" then
|
||||
return
|
||||
end
|
||||
end
|
||||
|
||||
-- ④ 镜像抑制
|
||||
if ctxs.suppress_mirror and ctxs.suppress_set and ctxs.suppress_set[cand.text] then
|
||||
return
|
||||
end
|
||||
|
||||
-- 7 格式化 + 大写 + span 对齐
|
||||
cand = format_and_autocap(cand, ctxs.code_ctx)
|
||||
-- ⑤ 格式化 + 大写 + span 对齐
|
||||
cand = format_and_autocap(cand)
|
||||
cand = ctxs.unify_tail_span(cand)
|
||||
yield(cand)
|
||||
end
|
||||
@@ -873,24 +643,10 @@ function M.func(input, env)
|
||||
local sort_window = tonumber(env.settings.sort_window) or 30
|
||||
local pure_code = gsub(code, "[%s%p]", "")
|
||||
local pure_code_lc = pure_code:lower()
|
||||
local all_upper = code:find("^%u%u") ~= nil
|
||||
local first_upper = (not all_upper) and (code:find("^%u") ~= nil)
|
||||
local enable_cap = (code_len > 1 and not code:find("^[%l%p]"))
|
||||
local break_signal = (ctx:get_property("english_spacing") == "true") --接受空状态下的空格或者回车信号
|
||||
-- 计算最终的 prev_is_eng (上文是否为英文)
|
||||
local effective_prev_is_eng = env.prev_commit_is_eng
|
||||
if break_signal then
|
||||
-- 收到打断信号,强制认为上文不是英文(从而不自动加空格)
|
||||
effective_prev_is_eng = false
|
||||
end
|
||||
|
||||
local code_ctx = {
|
||||
pure_code = pure_code,
|
||||
pure_code_lc = pure_code_lc,
|
||||
all_upper = all_upper,
|
||||
first_upper = first_upper,
|
||||
enable_cap = enable_cap,
|
||||
spacing_mode = env.english_spacing_mode, -- 传递空格模式配置
|
||||
prev_is_eng = effective_prev_is_eng, -- 传递上一次上屏是否英文
|
||||
}
|
||||
|
||||
local en_only, zh_only = false, false
|
||||
|
||||
@@ -11,9 +11,9 @@ schema:
|
||||
请勾选【万象拼音】以启用,万象拼音标准版本,带声调的词库,支持语法模型,全拼、简拼、整句、声调辅助筛选,拥有超越大厂的输入体验!
|
||||
【文本框输入:/pinyin全拼,/zrm自然码,/flypy小鹤,/mspy,/sogou,/pyjj等,详见README.md】
|
||||
dependencies:
|
||||
- wanxiang_mixedcode #中英文混合词汇
|
||||
- wanxiang_reverse # 部件拆字,反查及辅码
|
||||
|
||||
- wanxiang_mixedcode #混合编码
|
||||
- wanxiang_reverse #部件拆字,反查及辅码
|
||||
- wanxiang_english #英文
|
||||
# 开关
|
||||
# reset: 默认状态。注释掉后,切换窗口时不会重置到默认状态。
|
||||
# states: 方案选单显示的名称。可以注释掉,仍可以通过快捷键切换。
|
||||
@@ -89,7 +89,8 @@ engine:
|
||||
- lua_translator@*input_statistics #一个输入统计的脚本,以日、周、月、年等维度的统计
|
||||
- table_translator@custom_phrase #自定义短语 custom_phrase.txt,用于置顶自定义编码候选词
|
||||
- table_translator@chengyu #简码成语词汇表导入
|
||||
- table_translator@wanxiang_mixedcode #中英等混合词汇表导入
|
||||
- table_translator@wanxiang_english #英文词汇表导入
|
||||
- table_translator@wanxiang_mixedcode #混合编码词汇表导入
|
||||
- table_translator@wanxiang_reverse #挂接部件组字和笔画反查
|
||||
- script_translator@add_user_dict #按需自造词
|
||||
- script_translator@user_dict_set #使用自造词
|
||||
@@ -97,7 +98,8 @@ engine:
|
||||
- reverse_lookup_filter@radical_reverse_lookup #部件拆字滤镜,放在super_comment前面,进一步被超级注释处理以获得拼音编码的提示
|
||||
- lua_filter@*auto_phrase #comment前,无感造词,关闭调频的时候将汉字写入次翻译器,当没有英文候选的时候追加\上屏可完成英文造词
|
||||
- lua_filter@*super_lookup #comment前,字词输入中反查辅助筛选
|
||||
- lua_filter@*super_filter #comment前,功能太多详见Lua文件
|
||||
- lua_filter@*super_filter #comment前,相关功能见Lua文件
|
||||
- lua_filter@*super_english #comment前,负责英文方案及中英混输中英文单词格式化,语句流,自动加空格等策略
|
||||
- lua_filter@*super_comment_preedit #OpenCC前,超级注释模块、超级preedit,支持错词提示、辅助码显示,部件组字读音注释,有声调、无声调全拼编码的转换,支持个性化配置和关闭相应的功能,详情搜索super_comment_preedit进行详细配置
|
||||
- simplifier@emoji #Emoji滤镜
|
||||
- simplifier@s2t #简繁切换通繁
|
||||
@@ -391,12 +393,21 @@ chengyu:
|
||||
initial_quality: 1.3
|
||||
|
||||
# 中文、英文、数字、符号等混合词汇
|
||||
wanxiang_mixedcode:
|
||||
dictionary: wanxiang_mixedcode
|
||||
wanxiang_english:
|
||||
dictionary: wanxiang_english
|
||||
user_dict: en
|
||||
enable_completion: true
|
||||
enable_sentence: true
|
||||
initial_quality: 2
|
||||
comment_format: #这里很重要如果残留带声调字母,剩余编码提示计算出错引发程序崩溃
|
||||
- xform/^~.+$//
|
||||
wanxiang_mixedcode:
|
||||
dictionary: wanxiang_mixedcode
|
||||
enable_completion: true
|
||||
enable_sentence: false
|
||||
initial_quality: 2.1
|
||||
comment_format: #这里很重要如果残留带声调字母,剩余编码提示计算出错引发程序崩溃
|
||||
- xform/.*//
|
||||
|
||||
# Emoji
|
||||
emoji:
|
||||
|
||||
@@ -2789,7 +2789,259 @@ mixed:
|
||||
- derive/^([A-Z]{1})/\L$1/
|
||||
- derive/^([a-z]{2})/\U$1/
|
||||
- erase/.*[\-+'_#@\.·0-9].*$/
|
||||
english:
|
||||
# 通用的派生规则
|
||||
通用规则:
|
||||
- derive|^(.{1,3})$|$1/|
|
||||
# 数字派生
|
||||
- derive/1([4-7|9])/$1teen/
|
||||
- derive/11/eleven/
|
||||
- derive/12/twelve/
|
||||
- derive/13/thirteen/
|
||||
- derive/15/fifteen/
|
||||
- derive/18/eighteen/
|
||||
- derive/0/o/
|
||||
- derive/0/O/
|
||||
- derive/0/zero/
|
||||
- derive/1/one/
|
||||
- derive/10/ten/
|
||||
- derive/2/to/
|
||||
- derive/2/two/
|
||||
- derive/3/three/
|
||||
- derive/4/for/
|
||||
- derive/4/four/
|
||||
- derive/5/five/
|
||||
- derive/6/six/
|
||||
- derive/7/seven/
|
||||
- derive/8/eight/
|
||||
- derive/9/nine/
|
||||
# 符号派生
|
||||
- derive/\+/plus/
|
||||
- derive/\./dot/
|
||||
- derive/@/at/
|
||||
- derive/-/hyphen/
|
||||
- derive/#/hash/
|
||||
- derive/#/number/
|
||||
- derive/#/sharp/
|
||||
- derive/♯/sharp/
|
||||
- derive / slash
|
||||
- derive/&/and/
|
||||
- derive/%/percent/
|
||||
# 派生无单个特殊字符的拼写
|
||||
- derive/[.]//
|
||||
- derive/[+]//
|
||||
- derive/[@]//
|
||||
- derive/[-]//
|
||||
- derive/[_]//
|
||||
# 派生无任何非字母数字字符的拼写
|
||||
- derive/[^a-zA-Z0-9]//
|
||||
# 禁用非英文、数字开头的编码
|
||||
- erase/^[^a-zA-Z0-9].+$/
|
||||
# 全小写
|
||||
- derive/^.+$/\L$0/
|
||||
# 全大写
|
||||
- derive/^.+$/\U$0/
|
||||
# 首字母大写
|
||||
- derive/^./\U$0/
|
||||
# 前 2~10 个字母大写
|
||||
- derive/^([a-z]{2})/\U$1/
|
||||
- derive/^([a-z]{3})/\U$1/
|
||||
- derive/^([a-z]{4})/\U$1/
|
||||
- derive/^([a-z]{5})/\U$1/
|
||||
- derive/^([a-z]{6})/\U$1/
|
||||
- derive/^([a-z]{7})/\U$1/
|
||||
- derive/^([a-z]{8})/\U$1/
|
||||
- derive/^([a-z]{9})/\U$1/
|
||||
- derive/^([a-z]{10})/\U$1/
|
||||
|
||||
全拼:
|
||||
__append:
|
||||
- derive/(?<!\d)1([1-9])(?!\d)/shi$1/
|
||||
- derive/([1-9])0000(?!0)/$1wan/
|
||||
- derive/([1-9])000(?!0)/$1qian/
|
||||
- derive/([1-9])00(?!0)/$1bai/
|
||||
- derive/([2-9])0(?!0)/$1shi/
|
||||
- derive/(?<!\d)([2-9])([1-9])(?!\d)/$1shi$2/
|
||||
- derive/\./dian/
|
||||
- derive/10/shi/
|
||||
- derive/0/ling/
|
||||
- derive/1/yi/
|
||||
- derive/2/er/
|
||||
- derive/2/liang/
|
||||
- derive/3/san/
|
||||
- derive/4/si/
|
||||
- derive/5/wu/
|
||||
- derive/6/liu/
|
||||
- derive/7/qi/
|
||||
- derive/8/ba/
|
||||
- derive/9/jiu/
|
||||
- derive/\+/jia/
|
||||
- derive/#/jing/
|
||||
自然码:
|
||||
__append:
|
||||
- derive/(?<!\d)1([1-9])(?!\d)/ui$1/
|
||||
- derive/([1-9])0000(?!0)/$1wj/
|
||||
- derive/([1-9])000(?!0)/$1qm/
|
||||
- derive/([1-9])00(?!0)/$1bl/
|
||||
- derive/([2-9])0(?!0)/$1ui/
|
||||
- derive/(?<!\d)([2-9])([1-9])(?!\d)/$1ui$2/
|
||||
- derive/\./dm/
|
||||
- derive/10/ui/
|
||||
- derive/0/ly/
|
||||
- derive/1/yi/
|
||||
- derive/2/er/
|
||||
- derive/2/ld/
|
||||
- derive/3/sj/
|
||||
- derive/4/si/
|
||||
- derive/5/wu/
|
||||
- derive/6/lq/
|
||||
- derive/7/qi/
|
||||
- derive/8/ba/
|
||||
- derive/9/jq/
|
||||
- derive/\+/jw/
|
||||
- derive/#/jy/
|
||||
小鹤双拼:
|
||||
__append:
|
||||
- derive/(?<!\d)1([1-9])(?!\d)/ui$1/
|
||||
- derive/([1-9])0000(?!0)/$1wj/
|
||||
- derive/([1-9])000(?!0)/$1qm/
|
||||
- derive/([1-9])00(?!0)/$1bd/
|
||||
- derive/([2-9])0(?!0)/$1ui/
|
||||
- derive/(?<!\d)([2-9])([1-9])(?!\d)/$1ui$2/
|
||||
- derive/\./dm/
|
||||
- derive/10/ui/
|
||||
- derive/0/lk/
|
||||
- derive/1/yi/
|
||||
- derive/2/er/
|
||||
- derive/2/ll/
|
||||
- derive/3/sj/
|
||||
- derive/4/si/
|
||||
- derive/5/wu/
|
||||
- derive/6/lq/
|
||||
- derive/7/qi/
|
||||
- derive/8/ba/
|
||||
- derive/9/jq/
|
||||
- derive/\+/jx/
|
||||
- derive/#/jk/
|
||||
微软双拼:
|
||||
__append:
|
||||
- derive/(?<!\d)1([1-9])(?!\d)/ui$1/
|
||||
- derive/([1-9])0000(?!0)/$1wj/
|
||||
- derive/([1-9])000(?!0)/$1qm/
|
||||
- derive/([1-9])00(?!0)/$1bl/
|
||||
- derive/([2-9])0(?!0)/$1ui/
|
||||
- derive/(?<!\d)([2-9])([1-9])(?!\d)/$1ui$2/
|
||||
- derive/\./dm/
|
||||
- derive/10/ui/
|
||||
- derive/0/l;/
|
||||
- derive/1/yi/
|
||||
- derive/2/er/
|
||||
- derive/2/or/
|
||||
- derive/2/ld/
|
||||
- derive/3/sj/
|
||||
- derive/4/si/
|
||||
- derive/5/wu/
|
||||
- derive/6/lq/
|
||||
- derive/7/qi/
|
||||
- derive/8/ba/
|
||||
- derive/9/jq/
|
||||
- derive/\+/jw/
|
||||
- derive/#/j;/
|
||||
搜狗双拼:
|
||||
__append:
|
||||
- derive/(?<!\d)1([1-9])(?!\d)/ui$1/
|
||||
- derive/([1-9])0000(?!0)/$1wj/
|
||||
- derive/([1-9])000(?!0)/$1qm/
|
||||
- derive/([1-9])00(?!0)/$1bl/
|
||||
- derive/([2-9])0(?!0)/$1ui/
|
||||
- derive/(?<!\d)([2-9])([1-9])(?!\d)/$1ui$2/
|
||||
- derive/\./dm/
|
||||
- derive/10/ui/
|
||||
- derive/0/l;/
|
||||
- derive/1/yi/
|
||||
- derive/2/er/
|
||||
- derive/2/or/
|
||||
- derive/2/ld/
|
||||
- derive/3/sj/
|
||||
- derive/4/si/
|
||||
- derive/5/wu/
|
||||
- derive/6/lq/
|
||||
- derive/7/qi/
|
||||
- derive/8/ba/
|
||||
- derive/9/jq/
|
||||
- derive/\+/jw/
|
||||
- derive/#/jy/
|
||||
智能ABC:
|
||||
__append:
|
||||
- derive/(?<!\d)1([1-9])(?!\d)/vi$1/
|
||||
- derive/([1-9])0000(?!0)/$1wj/
|
||||
- derive/([1-9])000(?!0)/$1qw/
|
||||
- derive/([1-9])00(?!0)/$1bl/
|
||||
- derive/([2-9])0(?!0)/$1vi/
|
||||
- derive/(?<!\d)([2-9])([1-9])(?!\d)/$1vi$2/
|
||||
- derive/\./dw/
|
||||
- derive/10/vi/
|
||||
- derive/0/ly/
|
||||
- derive/1/yi/
|
||||
- derive/2/er/
|
||||
- derive/2/or/
|
||||
- derive/2/lt/
|
||||
- derive/3/sj/
|
||||
- derive/4/si/
|
||||
- derive/5/wu/
|
||||
- derive/6/lr/
|
||||
- derive/7/qi/
|
||||
- derive/8/ba/
|
||||
- derive/9/jr/
|
||||
- derive/\+/jd/
|
||||
- derive/#/jy/
|
||||
紫光双拼:
|
||||
__append:
|
||||
- derive/(?<!\d)1([1-9])(?!\d)/ii$1/
|
||||
- derive/([1-9])0000(?!0)/$1wr/
|
||||
- derive/([1-9])000(?!0)/$1qf/
|
||||
- derive/([1-9])00(?!0)/$1bp/
|
||||
- derive/([2-9])0(?!0)/$1ii/
|
||||
- derive/(?<!\d)([2-9])([1-9])(?!\d)/$1ii$2/
|
||||
- derive/\./df/
|
||||
- derive/10/ii/
|
||||
- derive/0/l;/
|
||||
- derive/1/yi/
|
||||
- derive/2/er/
|
||||
- derive/2/oj/
|
||||
- derive/2/lg/
|
||||
- derive/3/sr/
|
||||
- derive/4/si/
|
||||
- derive/5/wu/
|
||||
- derive/6/lj/
|
||||
- derive/7/qi/
|
||||
- derive/8/ba/
|
||||
- derive/9/jj/
|
||||
- derive/\+/jx/
|
||||
- derive/#/j;/
|
||||
拼音加加:
|
||||
__append:
|
||||
- derive/(?<!\d)1([1-9])(?!\d)/ii$1/
|
||||
- derive/([1-9])0000(?!0)/$1wf/
|
||||
- derive/([1-9])000(?!0)/$1qj/
|
||||
- derive/([1-9])00(?!0)/$1bs/
|
||||
- derive/([2-9])0(?!0)/$1ii/Add commentMore actions
|
||||
- derive/(?<!\d)([2-9])([1-9])(?!\d)/$1ii$2/
|
||||
- derive/\./dj/
|
||||
- derive/10/ii/
|
||||
- derive/0/lq/
|
||||
- derive/1/yi/
|
||||
- derive/2/eq/
|
||||
- derive/2/lh/
|
||||
- derive/3/sf/
|
||||
- derive/4/si/
|
||||
- derive/5/wu/
|
||||
- derive/6/ln/
|
||||
- derive/7/qi/
|
||||
- derive/8/ba/
|
||||
- derive/9/jn/
|
||||
- derive/\+/jb/
|
||||
- derive/#/jq/
|
||||
|
||||
模糊音:
|
||||
__append:
|
||||
|
||||
10
wanxiang_english.dict.yaml
Executable file
10
wanxiang_english.dict.yaml
Executable file
@@ -0,0 +1,10 @@
|
||||
# Rime dictionary
|
||||
# encoding: utf-8
|
||||
|
||||
# 英文输入方案的词库
|
||||
---
|
||||
name: wanxiang_english
|
||||
version: "lts"
|
||||
import_tables:
|
||||
- dicts/en # 英文主词库
|
||||
...
|
||||
63
wanxiang_english.schema.yaml
Executable file
63
wanxiang_english.schema.yaml
Executable file
@@ -0,0 +1,63 @@
|
||||
# Rime schema
|
||||
# encoding: utf-8
|
||||
schema:
|
||||
schema_id: wanxiang_english
|
||||
name: 万象英文
|
||||
version: "lts"
|
||||
author: amzxyz
|
||||
description: |
|
||||
支持整句输入英文的语句流方案,拥有更加智能的词组上屏加空格策略,支持单词组、语句任意词组中首字母大写或者全大写格式化。
|
||||
|
||||
switches:
|
||||
- name: ascii_mode
|
||||
reset: 0
|
||||
states: [整句, 字母]
|
||||
|
||||
engine:
|
||||
processors:
|
||||
- lua_processor@*backspace_limit
|
||||
- ascii_composer
|
||||
- key_binder
|
||||
- speller
|
||||
- recognizer
|
||||
- selector
|
||||
- navigator
|
||||
- express_editor
|
||||
segmentors:
|
||||
- matcher
|
||||
- ascii_segmentor
|
||||
- abc_segmentor
|
||||
- punct_segmentor
|
||||
- fallback_segmentor
|
||||
translators:
|
||||
- table_translator
|
||||
- punct_translator
|
||||
filters:
|
||||
- lua_filter@*super_english
|
||||
- uniquifier
|
||||
|
||||
speller:
|
||||
alphabet: zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA
|
||||
delimiter: " '"
|
||||
algebra:
|
||||
__include: wanxiang_algebra:/english/通用规则
|
||||
__patch: wanxiang_algebra:/english/全拼 #可选的选项有(全拼, 自然码, 小鹤双拼, 微软双拼, 搜狗双拼, 智能ABC, 紫光双拼, 拼音加加)
|
||||
|
||||
translator:
|
||||
dictionary: wanxiang_english
|
||||
user_dict: en
|
||||
enable_completion: true
|
||||
enable_sentence: true
|
||||
initial_quality: 2
|
||||
comment_format:
|
||||
- xform/^~.+$//
|
||||
key_binder:
|
||||
import_preset: default
|
||||
|
||||
recognizer:
|
||||
import_preset: default
|
||||
|
||||
#英文自动加空格,除关闭外共有3种模式可供选择
|
||||
#off: 关闭; before: 总是前加; after: 总是后加;
|
||||
#smart: 智能模式连续输入英文时第二个单词前开始加空格,非输入状态的回车与空格都能打断加空格状态
|
||||
english_spacing: smart
|
||||
@@ -7,10 +7,8 @@
|
||||
# %APPDATA%\Rime (Windows)
|
||||
---
|
||||
name: wanxiang_mixedcode
|
||||
version: "LTS"
|
||||
sort: by_weight #字典初始排序,可選original或by_weight
|
||||
use_preset_vocabulary: false
|
||||
version: "lts"
|
||||
sort: by_weight
|
||||
import_tables:
|
||||
- dicts/en #英文词库
|
||||
- dicts/cn&en #中英文混合词库
|
||||
- dicts/cn&en #混合编码词库
|
||||
...
|
||||
|
||||
@@ -221,7 +221,7 @@ wanxiang_mixedcode:
|
||||
enable_sentence: false
|
||||
initial_quality: 2
|
||||
comment_format:
|
||||
- xform/^.+$//
|
||||
- xform/.*//
|
||||
|
||||
# Emoji
|
||||
emoji:
|
||||
|
||||
Reference in New Issue
Block a user