From 3ce582e1951acb6dc381332d8e61381767d35a36 Mon Sep 17 00:00:00 2001 From: Dvel Date: Sun, 6 Aug 2023 18:37:28 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20corrector.lua=20=E9=94=99=E9=9F=B3?= =?UTF-8?q?=E9=94=99=E5=AD=97=E6=8F=90=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cn_dicts/others.dict.yaml | 55 ++++++++++++++----------- double_pinyin.schema.yaml | 3 ++ double_pinyin_abc.schema.yaml | 3 ++ double_pinyin_flypy.schema.yaml | 3 ++ double_pinyin_mspy.schema.yaml | 3 ++ double_pinyin_ziguang.schema.yaml | 3 ++ lua/corrector.lua | 68 +++++++++++++++++++++++++++++++ rime.lua | 25 ++++++++++-- rime_ice.schema.yaml | 3 ++ 9 files changed, 139 insertions(+), 27 deletions(-) create mode 100644 lua/corrector.lua diff --git a/cn_dicts/others.dict.yaml b/cn_dicts/others.dict.yaml index 1d9b278..5703a1a 100644 --- a/cn_dicts/others.dict.yaml +++ b/cn_dicts/others.dict.yaml @@ -8,12 +8,11 @@ # --- name: others -version: "2023-06-13" +version: "2023-08-06" sort: by_weight ... ##### 容错词 -# 「血xue、xie」「谁shui、shei」「露lu、lou」「熟shu、shou」「密钥yao、yue」已在 base 中实现 -# --- +# 与 corrector.lua 配合,上面的是正确的,下面是错误的 # 「馄饨」,正确是 tun,北方口语是 dun 馄饨 hun tun 馄饨 hun dun @@ -29,31 +28,13 @@ sort: by_weight 比萨饼 pi sa bing 吃比萨 chi bi sa 吃比萨 chi pi sa -超级至尊比萨 chao ji zhi zun bi sa -超级至尊比萨 chao ji zhi zun pi sa -至尊比萨 zhi zun bi sa -至尊比萨 zhi zun pi sa -大西洋珍鲑比萨 da xi yang zhen gui bi sa -大西洋珍鲑比萨 da xi yang zhen gui pi sa -海鲜比萨 hai xian bi sa -海鲜比萨 hai xian pi sa -蔬菜比萨 shu cai bi sa -蔬菜比萨 shu cai pi sa -做比萨 zuo bi sa -做比萨 zuo pi sa -可口的比萨 ke kou de bi sa -可口的比萨 ke kou de pi sa -烤比萨 kao bi sa -烤比萨 kao pi sa -美味的比萨 mei wei de bi sa -美味的比萨 mei wei de pi sa # 「扑街」,正确是 jie,口语是 gai 扑街 pu jie 扑街 pu gai -街溜子 jie liu zi -街溜子 gai liu zi 扑街仔 pu jie zai 扑街仔 pu gai zai +街溜子 jie liu zi +街溜子 gai liu zi # 「说服」,正确是 shuo,但「游说」「说客」仍然保留着 shui 的读音 说服 shuo fu 说服 shui fu @@ -80,6 +61,8 @@ sort: by_weight 一模一样 yi mo yi yang 装模作样 zhuang mu zuo yang 装模作样 zhuang mo zuo yang +人模狗样 ren mu gou yang +人模狗样 ren mo gou yang # 「阿房宫」的注音争议比较大 阿房宫 e pang gong 阿房宫 a pang gong @@ -99,6 +82,32 @@ sort: by_weight 南无阿弥陀佛 na mo a mi tuo fo 南无阿弥陀佛 nan wu e mi tuo fo 南无阿弥陀佛 nan wu a mi tuo fo +# 其他 +给予 ji yu +给予 gei yu +槟榔 bing lang +槟榔 bin lang +张柏芝 zhang bo zhi +张柏芝 zhang bai zhi +藤蔓 teng wan +藤蔓 teng man +弄堂 long tang +弄堂 nong tang +心宽体胖 xin kuan ti pan +心宽体胖 xin kuan ti pang +埋怨 man yuan +埋怨 mai yuan +虚与委蛇 xu yu wei yi +虚与委蛇 xu yu wei she +木讷 mu ne +木讷 mu na +# 错字 +曾经 ceng jing +曾今 ceng jin +按捺 an na +按耐 an nai +按捺不住 an na bu zhu +按耐不住 an nai bu zhu ##### 叠字 diff --git a/double_pinyin.schema.yaml b/double_pinyin.schema.yaml index fa668e3..ec33a82 100644 --- a/double_pinyin.schema.yaml +++ b/double_pinyin.schema.yaml @@ -77,6 +77,7 @@ engine: - lua_translator@unicode # Unicode - lua_translator@number_translator # 数字、金额大写 filters: + - lua_filter@corrector # 错音错字提示 - simplifier@emoji # Emoji - simplifier@traditionalize # 简繁切换 - lua_filter@autocap_filter # 英文自动大写 @@ -114,6 +115,8 @@ reduce_english_filter: translator: dictionary: rime_ice # 挂载词库 rime_ice.dict.yaml prism: double_pinyin # 多方案共用一个词库时,为避免冲突,需要用 prism 指定一个名字。 + spelling_hints: 8 # corrector.lua :为了让错音错字提示的 Lua 同时适配全拼双拼,将拼音显示在 comment 中 + always_show_comments: true # corrector.lua :Rime 默认在 preedit 等于 comment 时取消显示 comment,这里强制一直显示,供 corrector.lua 做判断用。 initial_quality: 1.2 # 拼音的权重应该比英文大 preedit_format: # preedit_format 影响到输入框的显示和“Shift+回车”上屏的字符 - xform/([bpmnljqxy])n/$1in/ diff --git a/double_pinyin_abc.schema.yaml b/double_pinyin_abc.schema.yaml index 223d391..48acc6d 100644 --- a/double_pinyin_abc.schema.yaml +++ b/double_pinyin_abc.schema.yaml @@ -76,6 +76,7 @@ engine: - lua_translator@unicode # Unicode - lua_translator@number_translator # 数字、金额大写 filters: + - lua_filter@corrector # 错音错字提示 - simplifier@emoji # Emoji - simplifier@traditionalize # 简繁切换 - lua_filter@autocap_filter # 英文自动大写 @@ -113,6 +114,8 @@ reduce_english_filter: translator: dictionary: rime_ice # 挂载词库 rime_ice.dict.yaml prism: double_pinyin_abc # 多方案共用一个词库时,为避免冲突,需要用 prism 指定一个名字。 + spelling_hints: 8 # corrector.lua :为了让错音错字提示的 Lua 同时适配全拼双拼,将拼音显示在 comment 中 + always_show_comments: true # corrector.lua :Rime 默认在 preedit 等于 comment 时取消显示 comment,这里强制一直显示,供 corrector.lua 做判断用。 initial_quality: 1.2 # 拼音的权重应该比英文大 preedit_format: # preedit_format 影响到输入框的显示和“Shift+回车”上屏的字符 - xform/o(\w)/0$1/ diff --git a/double_pinyin_flypy.schema.yaml b/double_pinyin_flypy.schema.yaml index 48ea327..cd0ff86 100644 --- a/double_pinyin_flypy.schema.yaml +++ b/double_pinyin_flypy.schema.yaml @@ -77,6 +77,7 @@ engine: - lua_translator@unicode # Unicode - lua_translator@number_translator # 数字、金额大写 filters: + - lua_filter@corrector # 错音错字提示 - simplifier@emoji # Emoji - simplifier@traditionalize # 简繁切换 - lua_filter@autocap_filter # 英文自动大写 @@ -114,6 +115,8 @@ reduce_english_filter: translator: dictionary: rime_ice # 挂载词库 rime_ice.dict.yaml prism: double_pinyin_flypy # 多方案共用一个词库时,为避免冲突,需要用 prism 指定一个名字。 + spelling_hints: 8 # corrector.lua :为了让错音错字提示的 Lua 同时适配全拼双拼,将拼音显示在 comment 中 + always_show_comments: true # corrector.lua :Rime 默认在 preedit 等于 comment 时取消显示 comment,这里强制一直显示,供 corrector.lua 做判断用。 initial_quality: 1.2 # 拼音的权重应该比英文大 preedit_format: # preedit_format 影响到输入框的显示和“Shift+回车”上屏的字符 - xform/([bpmfdtnljqx])n/$1iao/ diff --git a/double_pinyin_mspy.schema.yaml b/double_pinyin_mspy.schema.yaml index 141e6c5..930be8b 100644 --- a/double_pinyin_mspy.schema.yaml +++ b/double_pinyin_mspy.schema.yaml @@ -77,6 +77,7 @@ engine: - lua_translator@unicode # Unicode - lua_translator@number_translator # 数字、金额大写 filters: + - lua_filter@corrector # 错音错字提示 - simplifier@emoji # Emoji - simplifier@traditionalize # 简繁切换 - lua_filter@autocap_filter # 英文自动大写 @@ -114,6 +115,8 @@ reduce_english_filter: translator: dictionary: rime_ice # 挂载词库 rime_ice.dict.yaml prism: double_pinyin_mspy # 多方案共用一个词库时,为避免冲突,需要用 prism 指定一个名字。 + spelling_hints: 8 # corrector.lua :为了让错音错字提示的 Lua 同时适配全拼双拼,将拼音显示在 comment 中 + always_show_comments: true # corrector.lua :Rime 默认在 preedit 等于 comment 时取消显示 comment,这里强制一直显示,供 corrector.lua 做判断用。 initial_quality: 1.2 # 拼音的权重应该比英文大 preedit_format: # preedit_format 影响到输入框的显示和“Shift+回车”上屏的字符 - xform/([aoe])(\w)/0$2/ diff --git a/double_pinyin_ziguang.schema.yaml b/double_pinyin_ziguang.schema.yaml index ab07c69..53cc243 100644 --- a/double_pinyin_ziguang.schema.yaml +++ b/double_pinyin_ziguang.schema.yaml @@ -77,6 +77,7 @@ engine: - lua_translator@unicode # Unicode - lua_translator@number_translator # 数字、金额大写 filters: + - lua_filter@corrector # 错音错字提示 - simplifier@emoji # Emoji - simplifier@traditionalize # 简繁切换 - lua_filter@autocap_filter # 英文自动大写 @@ -114,6 +115,8 @@ reduce_english_filter: translator: dictionary: rime_ice # 挂载词库 rime_ice.dict.yaml prism: double_pinyin_ziguang # 多方案共用一个词库时,为避免冲突,需要用 prism 指定一个名字。 + spelling_hints: 8 # corrector.lua :为了让错音错字提示的 Lua 同时适配全拼双拼,将拼音显示在 comment 中 + always_show_comments: true # corrector.lua :Rime 默认在 preedit 等于 comment 时取消显示 comment,这里强制一直显示,供 corrector.lua 做判断用。 initial_quality: 1.2 # 拼音的权重应该比英文大 preedit_format: # preedit_format 影响到输入框的显示和“Shift+回车”上屏的字符 - xform/o(\w)/0$1/ # 零聲母先改爲0,以方便後面的轉換 diff --git a/lua/corrector.lua b/lua/corrector.lua new file mode 100644 index 0000000..92c93ac --- /dev/null +++ b/lua/corrector.lua @@ -0,0 +1,68 @@ +--[[ + 错音错字提示。 + 示例:「给予」的正确读音是 ji yu,当用户输入 gei yu 时,在候选项的 comment 显示正确读音 + 示例:「按耐」的正确写法是「按捺」,当用户输入「按耐」时,在候选项的 comment 显示正确写法 + + 为了让这个 Lua 同时适配全拼与双拼,使用 `spelling_hints` 生成的 comment(全拼拼音)作为通用的判断条件。 +--]] + +-- 容错词在 cn_dicts/others.dict.yaml +local corrections = { + -- 错音 + ["hun dun"] = { text = "馄饨", comment = "hun tun" }, + ["zhu jiao"] = { text = "主角", comment = "zhu jue" }, + ["jiao se"] = { text = "角色", comment = "jue se" }, + ["pi sa"] = { text = "比萨", comment = "bi sa" }, + ["chi pi sa"] = { text = "吃比萨", comment = "chi bi sa" }, + ["pi sa bing"] = { text = "比萨饼", comment = "bi sa bing" }, + ["pu gai"] = { text = "扑街", comment = "pu jie" }, + ["pu gai zai"] = { text = "扑街仔", comment = "pu jie zai" }, + ["gai liu zi"] = { text = "街溜子", comment = "jie liu zi" }, + ["shui fu"] = { text = "说服", comment = "shuo fu" }, + ["zuo ji"] = { text = "坐骑", comment = "zuo qi" }, + ["yi ji jue chen"] = { text = "一骑绝尘", comment = "yi qi jue chen" }, + ["yi ji hong chen fei zi xiao"] = { text = "一骑红尘妃子笑", comment = "yi qi hong chen fei zi xiao" }, + ["qian li zou dan ji"] = { text = "千里走单骑", comment = "qian li zou dan qi" }, + ["yi ji dang qian"] = { text = "一骑当千", comment = "yi qi dang qian" }, + ["dao hang"] = { text = "道行", comment = "dao heng" }, + ["mo yang"] = { text = "模样", comment = "mu yang" }, + ["you mo you yang"] = { text = "有模有样", comment = "you mu you yang" }, + ["yi mo yi yang"] = { text = "一模一样", comment = "yi mu yi yang" }, + ["zhuang mo zuo yang"] = { text = "装模作样", comment = "zhuang mu zuo yang" }, + ["ren mo gou yang"] = { text = "人模狗样", comment = "ren mu gou yang" }, + ["a mi tuo fo"] = { text = "阿弥陀佛", comment = "e mi tuo fo" }, + ["na mo a mi tuo fo"] = { text = "南无阿弥陀佛", comment = "na mo e mi tuo fo" }, + ["nan wu a mi tuo fo"] = { text = "南无阿弥陀佛", comment = "na mo e mi tuo fo" }, + ["nan wu e mi tuo fo"] = { text = "南无阿弥陀佛", comment = "na mo e mi tuo fo" }, + ["gei yu"] = { text = "给予", comment = "ji yu" }, + ["bin lang"] = { text = "槟榔", comment = "bing lang" }, + ["zhang bai zhi"] = { text = "张柏芝", comment = "zhang bo zhi" }, + ["teng man"] = { text = "藤蔓", comment = "teng wan" }, + ["nong tang"] = { text = "弄堂", comment = "long tang" }, + ["xin kuan ti pang"] = { text = "心宽体胖", comment = "xin kuan ti pan" }, + ["mai yuan"] = { text = "埋怨", comment = "man yuan" }, + ["xu yu wei she"] = { text = "虚与委蛇", comment = "xu yu wei yi" }, + ["mu na"] = { text = "木讷", comment = "mu ne" }, + -- 错字 + ["ceng jin"] = { text = "曾今", comment = "曾经" }, + ["an nai"] = { text = "按耐", comment = "按捺(na)" }, + ["an nai bu zhu"] = { text = "按耐不住", comment = "按捺(na)不住" }, + ["sheng di ya ge"] = { text = "圣地亚哥", comment = "圣迭戈" }, + ["bie jie"] = { text = "别介", comment = "别价" }, + ["beng jie"] = { text = "甭介", comment = "甭价" }, +} + +local function corrector(input) + for cand in input:iter() do + -- cand.comment 是目前输入的词汇的完整拼音 + local c = corrections[cand.comment] + if c and cand.text == c.text then + cand:get_genuine().comment = c.comment + else + cand:get_genuine().comment = "" + end + yield(cand) + end +end + +return corrector diff --git a/rime.lua b/rime.lua index d2df709..c1fc3b0 100644 --- a/rime.lua +++ b/rime.lua @@ -1,12 +1,17 @@ -- Rime Lua 扩展 https://github.com/hchunhui/librime-lua -- 文档 https://github.com/hchunhui/librime-lua/wiki/Scripting --- v 模式 symbols 优先(全拼) -v_filter = require("v_filter") + + +-- processors: -- 以词定字,可在 default.yaml key_binder 下配置快捷键,默认为左右中括号 [ ] select_character = require("select_character") + + +-- translators: + -- 日期时间,可在方案中配置触发关键字。 date_translator = require("date_translator") @@ -16,6 +21,16 @@ unicode = require("unicode") -- 数字、人民币大写,R 开头 number_translator = require("number_translator") + + +-- filters: + +-- 错音错字提示 +corrector = require("corrector") + +-- v 模式 symbols 优先(全拼) +v_filter = require("v_filter") + -- 自动大写英文词汇 autocap_filter = require("autocap_filter") @@ -23,11 +38,13 @@ autocap_filter = require("autocap_filter") reduce_english_filter = require("reduce_english_filter") + + -- 默认未启用: -- 长词优先(全拼) -- 在 engine/filters 增加 - lua_filter@long_word_filter --- 在方案里写配置项: +-- 在方案里写配置项: -- 提升 count 个词语,插入到第 idx 个位置。 -- 示例:将 2 个词插入到第 4、5 个候选项,输入 jie 得到「1接 2解 3姐 4饥饿 5极恶」 -- long_word_filter: @@ -43,7 +60,7 @@ cn_en_spacer = require("cn_en_spacer") -- 在 engine/filters 增加 - lua_filter@t9_preedit t9_preedit = require("t9_preedit") --- 根据是否在用户词典,在结尾加上一个星号 * +-- 根据是否在用户词典,在 comment 上加上一个星号 * -- 在 engine/filters 增加 - lua_filter@is_in_user_dict -- 在方案里写配置项: -- is_in_user_dict: true 为输入过的内容加星号 diff --git a/rime_ice.schema.yaml b/rime_ice.schema.yaml index ff30611..805ebf5 100644 --- a/rime_ice.schema.yaml +++ b/rime_ice.schema.yaml @@ -67,6 +67,7 @@ engine: - lua_translator@unicode # Unicode - lua_translator@number_translator # 数字、金额大写 filters: + - lua_filter@corrector # 错音错字提示 - simplifier@emoji # Emoji - simplifier@traditionalize # 简繁切换 - lua_filter@v_filter # v 模式 symbols 优先(否则是英文优先) @@ -103,6 +104,8 @@ reduce_english_filter: # 主翻译器,拼音 translator: dictionary: rime_ice # 挂载词库 rime_ice.dict.yaml + spelling_hints: 8 # corrector.lua :为了让错音错字提示的 Lua 同时适配全拼双拼,将拼音显示在 comment 中 + always_show_comments: true # corrector.lua :Rime 默认在 preedit 等于 comment 时取消显示 comment,这里强制一直显示,供 corrector.lua 做判断用。 initial_quality: 1.2 # 拼音的权重应该比英文大 preedit_format: # preedit_format 影响到输入框的显示和“Shift+回车”上屏的字符 - xform/([jqxy])v/$1u/ # 显示为 ju qu xu yu