diff --git a/dicts/jichu.dict.yaml b/dicts/jichu.dict.yaml index 91eef56..18c6adf 100644 --- a/dicts/jichu.dict.yaml +++ b/dicts/jichu.dict.yaml @@ -216907,6 +216907,7 @@ sort: by_weight 不是福 bú shì fú 124 不是该 bú shì gāi 248 不适感 bú shì gǎn 284 +不是干 bú shì gàn 252 不是港 bú shì gǎng 89 不是个 bú shì gè 346 不适格 bú shì gé 344 diff --git a/lua/input_statistics.lua b/lua/input_statistics.lua index c074cfa..29e1073 100644 --- a/lua/input_statistics.lua +++ b/lua/input_statistics.lua @@ -1,262 +1,317 @@ -- github.com/amzxyz --- 一个用于统计输入字数和其他时间维度的统计。先搭起一个框架,有志之士看看如何优化,统计什么数据,什么维度,构建一个有效的统计信息 --- 硬编码输入方案信息 +-- input_stats.lua +-- Rime 统计增强版 (LevelDB / 滚动时间窗口 / 效率仪表盘 / 汉字提纯) +-- 维度升级:1, 2, 3, 4, ≥5 字独立统计 +-- UI优化:综合数据田字格布局,峰值与均速分开显示 + +local userdb = require("lib/userdb") +-- 初始化数据库 +local db = userdb.LevelDb("lua/stats") + +-- 硬编码信息 local schema_name = "万象拼音" local software_name = rime_api.get_distribution_code_name() -local software_version = rime_api.get_distribution_version() --- 初始化统计表(若未加载) -input_stats = input_stats or { - daily = {count = 0, length = 0, fastest = 0, ts = 0}, - weekly = {count = 0, length = 0, fastest = 0, ts = 0}, - monthly = {count = 0, length = 0, fastest = 0, ts = 0}, - yearly = {count = 0, length = 0, fastest = 0, ts = 0}, - lengths = {}, - daily_max = 0, - recent = {} -} - --- 时间戳工具函数 -local function start_of_day(t) - return os.time{year=t.year, month=t.month, day=t.day, hour=0} -end -local function start_of_week(t) - local d = t.wday == 1 and 6 or (t.wday - 2) - return os.time{year=t.year, month=t.month, day=t.day - d, hour=0} -end -local function start_of_month(t) - return os.time{year=t.year, month=t.month, day=1, hour=0} -end -local function start_of_year(t) - return os.time{year=t.year, month=1, day=1, hour=0} +-- ----------------------------------------------------------------------------- +-- 汉字识别核心逻辑 +-- ----------------------------------------------------------------------------- +local function is_chinese_code(c) + return (c >= 0x4E00 and c <= 0x9FFF) or (c >= 0x3400 and c <= 0x4DBF) or + (c >= 0x20000 and c <= 0x2A6DF) or (c >= 0x2A700 and c <= 0x2B73F) or + (c >= 0x2B740 and c <= 0x2B81F) or (c >= 0x2B820 and c <= 0x2CEAF) or + (c >= 0x2CEB0 and c <= 0x2EBEF) or (c >= 0x30000 and c <= 0x3134F) or + (c >= 0x31350 and c <= 0x323AF) or (c >= 0x2EBF0 and c <= 0x2EE5F) or + (c >= 0xF900 and c <= 0xFAFF) or (c >= 0x2F800 and c <= 0x2FA1F) or + (c >= 0x2E80 and c <= 0x2EFF) or (c >= 0x2F00 and c <= 0x2FDF) end --- 判断是否是统计命令 -local function is_summary_command(text) - return text == "/rtj" or text == "/ztj" or text == "/ytj" or text == "/ntj" or text == "/tj" +local function get_pure_chinese_length(text) + local count = 0 + for _, code in utf8.codes(text) do + if is_chinese_code(code) then count = count + 1 end + end + return count end --- 更新统计数据 -local function update_stats(input_length) - local now = os.date("*t") - local now_ts = os.time(now) +-- ----------------------------------------------------------------------------- +-- 内存缓存:实时分速 +-- ----------------------------------------------------------------------------- +local speed_buffer = {} +local last_cleanup_ts = 0 - local day_ts = start_of_day(now) - local week_ts = start_of_week(now) - local month_ts = start_of_month(now) - local year_ts = start_of_year(now) - - if input_stats.daily.ts ~= day_ts then - input_stats.daily = {count = 0, length = 0, fastest = 0, ts = day_ts} - input_stats.daily_max = 0 - input_stats.recent = {} +local function get_current_kpm(now) + if now - last_cleanup_ts > 5 then + local new_buf = {} + local threshold = now - 60 + for _, item in ipairs(speed_buffer) do + if item.ts > threshold then table.insert(new_buf, item) end + end + speed_buffer = new_buf + last_cleanup_ts = now end - if input_stats.weekly.ts ~= week_ts then - input_stats.weekly = {count = 0, length = 0, fastest = 0, ts = week_ts} - end - if input_stats.monthly.ts ~= month_ts then - input_stats.monthly = {count = 0, length = 0, fastest = 0, ts = month_ts} - end - if input_stats.yearly.ts ~= year_ts then - input_stats.yearly = {count = 0, length = 0, fastest = 0, ts = year_ts} - end - - -- 更新记录 - local update = function(stat) - stat.count = stat.count + 1 - stat.length = stat.length + input_length - end - update(input_stats.daily) - update(input_stats.weekly) - update(input_stats.monthly) - update(input_stats.yearly) - - if input_length > input_stats.daily_max then - input_stats.daily_max = input_length - end - - input_stats.lengths[input_length] = (input_stats.lengths[input_length] or 0) + 1 - - -- 最近一分钟统计 - local ts = os.time() - table.insert(input_stats.recent, {ts = ts, len = input_length}) - local threshold = ts - 60 local total = 0 - local new_recent = {} - for _, item in ipairs(input_stats.recent) do - if item.ts >= threshold then - total = total + item.len - table.insert(new_recent, item) - end + local threshold = now - 60 + for _, item in ipairs(speed_buffer) do + if item.ts > threshold then total = total + item.len end end - input_stats.recent = new_recent - if total > input_stats.daily.fastest then input_stats.daily.fastest = total end - if total > input_stats.weekly.fastest then input_stats.weekly.fastest = total end - if total > input_stats.monthly.fastest then input_stats.monthly.fastest = total end - if total > input_stats.yearly.fastest then input_stats.yearly.fastest = total end + return total end --- 表序列化工具(请自行根据实际添加到环境中) -table.serialize = function(tbl) - local lines = {"{"} - for k, v in pairs(tbl) do - local key = (type(k) == "string") and ("[\"" .. k .. "\"]") or ("[" .. k .. "]") - local val - if type(v) == "table" then - val = table.serialize(v) - elseif type(v) == "string" then - val = '"' .. v .. '"' - else - val = tostring(v) - end - table.insert(lines, string.format(" %s = %s,", key, val)) +-- ----------------------------------------------------------------------------- +-- 数据库操作 +-- ----------------------------------------------------------------------------- +local function ensure_db_open() + if not db:loaded() then return db:open() end + return true +end + +local function db_get(key) + return tonumber(db:fetch(key)) or 0 +end + +local function db_incr_day_and_total(key_suffix, amount, day_key) + amount = amount or 1 + local d_key = day_key .. key_suffix + db:update(d_key, tostring(db_get(d_key) + amount)) + local t_key = "total" .. key_suffix + db:update(t_key, tostring(db_get(t_key) + amount)) +end + +local function db_set_max_day(key_suffix, new_val, day_key) + local d_key = day_key .. key_suffix + if new_val > db_get(d_key) then db:update(d_key, tostring(new_val)) end + local t_key = "total" .. key_suffix + if new_val > db_get(t_key) then db:update(t_key, tostring(new_val)) end +end + +local function clear_all_data() + if not ensure_db_open() then return false end + if db.empty then + db:empty() + speed_buffer = {} + return true end - table.insert(lines, "}") - return table.concat(lines, "\n") -end - --- 保存至文件 -local function save_stats() - local path = rime_api.get_user_data_dir() .. "/lua/input_stats.lua" - local file = io.open(path, "w") - if not file then return end - file:write("input_stats = " .. table.serialize(input_stats) .. "\n") - file:close() -end - --- 显示函数(以日统计为例) -local function format_daily_summary() - local s = input_stats.daily - if s.count == 0 then return "※ 今天没有任何记录。" end - return string.format( - "※ 今天的统计:\n%s\n◉ 今天\n共上屏[%d]次\n共输入[%d]字\n最快一分钟输入了[%d]字\n%s\n◉ 方案:%s\n◉ 平台:%s %s\n%s", - string.rep("─", 14), s.count, s.length, s.fastest, - string.rep("─", 14), schema_name, software_name, software_version, - string.rep("─", 14)) -end - --- 显示函数(周统计) -local function format_weekly_summary() - local s = input_stats.weekly - if s.count == 0 then return "※ 本周没有任何记录。" end - return string.format( - "※ 本周的统计:\n%s\n◉ 本周共上屏[%d]次\n共输入[%d]字\n最快一分钟输入了[%d]字\n周内单日最多一次输入[%d]字\n%s\n◉ 方案:%s\n◉ 平台:%s %s\n%s", - string.rep("─", 14), s.count, s.length, s.fastest, input_stats.daily_max, - string.rep("─", 14), schema_name, software_name, software_version, - string.rep("─", 14)) -end - --- 显示函数(月统计) -local function format_monthly_summary() - local s = input_stats.monthly - if s.count == 0 then return "※ 本月没有任何记录。" end - return string.format( - "※ 本月的统计:\n%s\n◉ 本月共上屏[%d]次\n共输入[%d]字\n最快一分钟输入了[%d]字\n%s\n◉ 方案:%s\n◉ 平台:%s %s\n%s", - string.rep("─", 14), s.count, s.length, s.fastest, - string.rep("─", 14), schema_name, software_name, software_version, - string.rep("─", 14)) -end - --- 显示函数(年统计) -local function format_yearly_summary() - local s = input_stats.yearly - if s.count == 0 then return "※ 本年没有任何记录。" end - local length_counts = {} - for length, count in pairs(input_stats.lengths) do - table.insert(length_counts, {length = length, count = count}) + local ok, iter = pcall(function() return db:query("") end) + if ok and iter then + local keys = {} + for key, _ in iter do table.insert(keys, key) end + for _, key in ipairs(keys) do db:erase(key) end + speed_buffer = {} + return true end - table.sort(length_counts, function(a, b) return a.count > b.count end) - local fav = length_counts[1] and length_counts[1].length or 0 - return string.format( - "※ 本年的统计:\n%s\n◉ 本年共上屏[%d]次\n共输入[%d]字\n最快一分钟输入了[%d]字\n您最常输入长度为[%d]的词组\n%s\n◉ 方案:%s\n◉ 平台:%s %s\n%s", - string.rep("─", 14), s.count, s.length, s.fastest, fav, - string.rep("─", 14), schema_name, software_name, software_version, - string.rep("─", 14)) + return false end --- 转换器函数:处理命令 /rtj /ztj /ytj /ntj -local function translator(input, seg, env) - if input:sub(1, 1) ~= "/" then return end - local summary = "" - if input == "/rtj" then - summary = format_daily_summary() - elseif input == "/ztj" then - summary = format_weekly_summary() - elseif input == "/ytj" then - summary = format_monthly_summary() - elseif input == "/ntj" then - summary = format_yearly_summary() - elseif input == "/tj" then - summary = format_daily_summary() .. "\n\n" .. format_weekly_summary() .. "\n\n" .. format_monthly_summary() .. "\n\n" .. format_yearly_summary() - elseif input == "/tjql" then - input_stats = { - daily = {count = 0, length = 0, fastest = 0, ts = 0}, - weekly = {count = 0, length = 0, fastest = 0, ts = 0}, - monthly = {count = 0, length = 0, fastest = 0, ts = 0}, - yearly = {count = 0, length = 0, fastest = 0, ts = 0}, - lengths = {}, - daily_max = 0, - recent = {} - } - save_stats() - summary = "※ 所有统计数据已清空。" - end - if summary ~= "" then - yield(Candidate("stat", seg.start, seg._end, summary, "")) +-- ----------------------------------------------------------------------------- +-- 记录逻辑 +-- ----------------------------------------------------------------------------- +local function record_stats(hanzi_len, code_len) + if not ensure_db_open() then return end + + local now = os.time() + local t = os.date("*t", now) + local day_key = string.format("d_%04d%02d%02d", t.year, t.month, t.day) + + table.insert(speed_buffer, {ts = now, len = hanzi_len}) + local current_kpm = get_current_kpm(now) + + db_incr_day_and_total("_len", hanzi_len, day_key) + db_incr_day_and_total("_cnt", 1, day_key) + db_incr_day_and_total("_code", code_len, day_key) + + if hanzi_len == 1 then db_incr_day_and_total("_l1", 1, day_key) + elseif hanzi_len == 2 then db_incr_day_and_total("_l2", 1, day_key) + elseif hanzi_len == 3 then db_incr_day_and_total("_l3", 1, day_key) + elseif hanzi_len == 4 then db_incr_day_and_total("_l4", 1, day_key) + elseif hanzi_len > 4 then db_incr_day_and_total("_l_gt4", 1, day_key) end + + db_set_max_day("_spd", current_kpm, day_key) end --- 加载保存的统计数据(input_stats.lua) -local function load_stats_from_lua_file() - local path = rime_api.get_user_data_dir() .. "/lua/input_stats.lua" - local ok, result = pcall(function() - local env = {} - local f = loadfile(path, "t", env) - if f then f() end - return env.input_stats - end) - if ok and type(result) == "table" then - input_stats = result - else - -- 保底初始化,防止错误 - input_stats = { - daily = {count = 0, length = 0, fastest = 0, ts = 0}, - weekly = {count = 0, length = 0, fastest = 0, ts = 0}, - monthly = {count = 0, length = 0, fastest = 0, ts = 0}, - yearly = {count = 0, length = 0, fastest = 0, ts = 0}, - lengths = {}, - daily_max = 0, - recent = {} + +-- ----------------------------------------------------------------------------- +-- 聚合查询逻辑 +-- ----------------------------------------------------------------------------- +local function aggregate_stats(days_lookback) + if not ensure_db_open() then return nil end + + if days_lookback == 0 then + local prefix = "total" + return { + len = db_get(prefix .. "_len"), + cnt = db_get(prefix .. "_cnt"), + code = db_get(prefix .. "_code"), + spd = db_get(prefix .. "_spd"), + l1 = db_get(prefix .. "_l1"), + l2 = db_get(prefix .. "_l2"), + l3 = db_get(prefix .. "_l3"), + l4 = db_get(prefix .. "_l4"), + l_gt4 = db_get(prefix .. "_l_gt4") } end + + local res = {len=0, cnt=0, code=0, spd=0, l1=0, l2=0, l3=0, l4=0, l_gt4=0} + local now_ts = os.time() + + for i = 0, days_lookback - 1 do + local target_ts = now_ts - (i * 86400) + local t = os.date("*t", target_ts) + local day_key = string.format("d_%04d%02d%02d", t.year, t.month, t.day) + + res.len = res.len + db_get(day_key .. "_len") + res.cnt = res.cnt + db_get(day_key .. "_cnt") + res.code = res.code + db_get(day_key .. "_code") + res.l1 = res.l1 + db_get(day_key .. "_l1") + res.l2 = res.l2 + db_get(day_key .. "_l2") + res.l3 = res.l3 + db_get(day_key .. "_l3") + res.l4 = res.l4 + db_get(day_key .. "_l4") + res.l_gt4 = res.l_gt4 + db_get(day_key .. "_l_gt4") + + local daily_spd = db_get(day_key .. "_spd") + if daily_spd > res.spd then res.spd = daily_spd end + end + return res end + +-- ----------------------------------------------------------------------------- +-- UI 渲染 +-- ----------------------------------------------------------------------------- +local function draw_bar(percent) + local length = 10 + local filled_len = math.floor((percent / 100) * length) + local empty_len = length - filled_len + return string.rep("▓", filled_len) .. string.rep("░", empty_len) +end + +local function format_summary(title, data) + if not data or data.cnt == 0 then return "※ " .. title .. "暂无数据" end + + local avg_code = 0 + if data.len > 0 then avg_code = data.code / data.len end + + local phrase_rate = 0 + if data.len > 0 then phrase_rate = (data.len - data.l1) / data.len * 100 end + + -- 估算均速 (Average Speed) + -- 由于没有记录精确的打字时长,这里用一个经验公式估算: + -- 假设每次上屏平均耗时 1.5 - 2 秒左右,以此倒推一个大概的“均速”用于展示 + -- 公式:字数 / (次数 * 2秒 / 60) + local estimated_avg_spd = 0 + if data.cnt > 0 then + estimated_avg_spd = math.floor(data.len / ((data.cnt * 2) / 60)) + -- 修正:如果估算值超过峰值,说明上屏间隔极短(连打),则取峰值的 60% + if estimated_avg_spd > data.spd then estimated_avg_spd = math.floor(data.spd * 0.6) end + if estimated_avg_spd == 0 and data.len > 0 then estimated_avg_spd = data.len end -- 极少字数保底 + end + + local p1 = (data.l1 / data.cnt) * 100 + local p2 = (data.l2 / data.cnt) * 100 + local p3 = (data.l3 / data.cnt) * 100 + local p4 = (data.l4 / data.cnt) * 100 + local p_gt4 = (data.l_gt4 / data.cnt) * 100 + local ver = rime_api.get_distribution_version() or "" + + return string.format( + "※ %s统计 · 效率仪表盘\n" .. + "───────────────────\n" .. + "📊 综合数据\n" .. + " 总字数:%d\t上屏:%d\n" .. + " 峰值速:%d\t均速:%d\n" .. + "───────────────────\n" .. + "⚡ 核心效率\n" .. + " 平均编码:%.2f 键/字\n" .. + " 词组连打:%.1f %%\n" .. + "───────────────────\n" .. + "📈 字词分布\n" .. + " [1] %3d%% %s\n" .. + " [2] %3d%% %s\n" .. + " [3] %3d%% %s\n" .. + " [4] %3d%% %s\n" .. + " [≥5] %2d%% %s\n" .. + "───────────────────\n" .. + "◉ 方案:%s\n" .. + "◉ 平台:%s %s", + title, data.len, data.cnt, + data.spd, estimated_avg_spd, -- 峰值与均速并排 + avg_code, phrase_rate, + p1, draw_bar(p1), + p2, draw_bar(p2), + p3, draw_bar(p3), + p4, draw_bar(p4), + p_gt4, draw_bar(p_gt4), -- 改为 ≥5 + schema_name, software_name, ver + ) +end + +-- ----------------------------------------------------------------------------- +-- Init & Fini +-- ----------------------------------------------------------------------------- local function init(env) + ensure_db_open() + if env.stat_notifier then env.stat_notifier:disconnect() end local ctx = env.engine.context - - -- 加载历史统计数据 - load_stats_from_lua_file() - - -- 注册提交通知回调 - ctx.commit_notifier:connect(function() + + env.stat_notifier = ctx.commit_notifier:connect(function(ctx) local commit_text = ctx:get_commit_text() if not commit_text or commit_text == "" then return end + if commit_text:sub(1, 1) == "/" then return end + if commit_text:find("^[※◉]") then return end - -- 排除统计命令(如 /rtj、/tj 等) - if is_summary_command(commit_text) then return end + local hanzi_len = get_pure_chinese_length(commit_text) + if hanzi_len == 0 then return end + + local script_text = ctx:get_script_text() or "" + local code_len = string.len(script_text) + if code_len == 0 then code_len = hanzi_len * 2 end - -- 排除统计候选上屏内容(例如 "※ 今天..." 或 "◉ 本年...") - if commit_text:match("^[※◉]") then return end - - -- 排除我们自己生成的统计候选(comment 是 "input_stats_summary") - -- local cand = ctx:get_selected_candidate() - -- if cand and cand.comment == "input_stats_summary" then return end - - -- 保存最近一次 commit 内容 + local now_ms = os.clock() + if env.last_commit_time and (now_ms - env.last_commit_time < 0.05) then + if env.last_commit_text == commit_text then return end + end + env.last_commit_time = now_ms env.last_commit_text = commit_text - -- 统计长度 - local input_length = utf8.len(commit_text) or string.len(commit_text) - update_stats(input_length) - save_stats() + record_stats(hanzi_len, code_len) end) end -return { init = init, func = translator } \ No newline at end of file + +local function fini(env) + if env.stat_notifier then + env.stat_notifier:disconnect() + env.stat_notifier = nil + end + if db and db:loaded() then + db:close() + end +end + +local function translator(input, seg, env) + if input:sub(1, 1) ~= "/" then return end + + local summary = "" + local data = nil + local title = "" + + if input == "/tjql" then + if clear_all_data() then + yield(Candidate("stat", seg.start, seg._end, "※ 统计数据已全部清空。", "🗑️")) + else + yield(Candidate("stat", seg.start, seg._end, "※ 数据清空失败,请检查权限。", "❌")) + end + return + end + + if input == "/rtj" then title = "今日"; data = aggregate_stats(1) + elseif input == "/ztj" then title = "七日"; data = aggregate_stats(7) + elseif input == "/ytj" then title = "卅日"; data = aggregate_stats(30) + elseif input == "/ntj" then title = "本年"; data = aggregate_stats(365) + elseif input == "/ttj" then title = "生涯"; data = aggregate_stats(0) + end + + if data then + summary = format_summary(title, data) + yield(Candidate("stat", seg.start, seg._end, summary, "📊")) + end +end + +return { init = init, func = translator, fini = fini } \ No newline at end of file