feat: 统计新增数据库储存方式,全新的格式化样式,多数据维度

This commit is contained in:
amzxyz
2025-12-11 00:58:51 +08:00
parent 30cdfdecba
commit 5b8e37855d
2 changed files with 285 additions and 229 deletions

View File

@@ -216907,6 +216907,7 @@ sort: by_weight
不是福 bú shì fú 124
不是该 bú shì gāi 248
不适感 bú shì gǎn 284
不是干 bú shì gàn 252
不是港 bú shì gǎng 89
不是个 bú shì gè 346
不适格 bú shì gé 344

View File

@@ -1,262 +1,317 @@
-- github.com/amzxyz
-- 一个用于统计输入字数和其他时间维度的统计。先搭起一个框架,有志之士看看如何优化,统计什么数据,什么维度,构建一个有效的统计信息
-- 硬编码输入方案信息
-- input_stats.lua
-- Rime 统计增强版 (LevelDB / 滚动时间窗口 / 效率仪表盘 / 汉字提纯)
-- 维度升级1, 2, 3, 4, ≥5 字独立统计
-- UI优化综合数据田字格布局峰值与均速分开显示
local userdb = require("lib/userdb")
-- 初始化数据库
local db = userdb.LevelDb("lua/stats")
-- 硬编码信息
local schema_name = "万象拼音"
local software_name = rime_api.get_distribution_code_name()
local software_version = rime_api.get_distribution_version()
-- 初始化统计表(若未加载)
input_stats = input_stats or {
daily = {count = 0, length = 0, fastest = 0, ts = 0},
weekly = {count = 0, length = 0, fastest = 0, ts = 0},
monthly = {count = 0, length = 0, fastest = 0, ts = 0},
yearly = {count = 0, length = 0, fastest = 0, ts = 0},
lengths = {},
daily_max = 0,
recent = {}
}
-- 时间戳工具函数
local function start_of_day(t)
return os.time{year=t.year, month=t.month, day=t.day, hour=0}
end
local function start_of_week(t)
local d = t.wday == 1 and 6 or (t.wday - 2)
return os.time{year=t.year, month=t.month, day=t.day - d, hour=0}
end
local function start_of_month(t)
return os.time{year=t.year, month=t.month, day=1, hour=0}
end
local function start_of_year(t)
return os.time{year=t.year, month=1, day=1, hour=0}
-- -----------------------------------------------------------------------------
-- 汉字识别核心逻辑
-- -----------------------------------------------------------------------------
local function is_chinese_code(c)
return (c >= 0x4E00 and c <= 0x9FFF) or (c >= 0x3400 and c <= 0x4DBF) or
(c >= 0x20000 and c <= 0x2A6DF) or (c >= 0x2A700 and c <= 0x2B73F) or
(c >= 0x2B740 and c <= 0x2B81F) or (c >= 0x2B820 and c <= 0x2CEAF) or
(c >= 0x2CEB0 and c <= 0x2EBEF) or (c >= 0x30000 and c <= 0x3134F) or
(c >= 0x31350 and c <= 0x323AF) or (c >= 0x2EBF0 and c <= 0x2EE5F) or
(c >= 0xF900 and c <= 0xFAFF) or (c >= 0x2F800 and c <= 0x2FA1F) or
(c >= 0x2E80 and c <= 0x2EFF) or (c >= 0x2F00 and c <= 0x2FDF)
end
-- 判断是否是统计命令
local function is_summary_command(text)
return text == "/rtj" or text == "/ztj" or text == "/ytj" or text == "/ntj" or text == "/tj"
local function get_pure_chinese_length(text)
local count = 0
for _, code in utf8.codes(text) do
if is_chinese_code(code) then count = count + 1 end
end
return count
end
-- 更新统计数据
local function update_stats(input_length)
local now = os.date("*t")
local now_ts = os.time(now)
-- -----------------------------------------------------------------------------
-- 内存缓存:实时分速
-- -----------------------------------------------------------------------------
local speed_buffer = {}
local last_cleanup_ts = 0
local day_ts = start_of_day(now)
local week_ts = start_of_week(now)
local month_ts = start_of_month(now)
local year_ts = start_of_year(now)
if input_stats.daily.ts ~= day_ts then
input_stats.daily = {count = 0, length = 0, fastest = 0, ts = day_ts}
input_stats.daily_max = 0
input_stats.recent = {}
local function get_current_kpm(now)
if now - last_cleanup_ts > 5 then
local new_buf = {}
local threshold = now - 60
for _, item in ipairs(speed_buffer) do
if item.ts > threshold then table.insert(new_buf, item) end
end
speed_buffer = new_buf
last_cleanup_ts = now
end
if input_stats.weekly.ts ~= week_ts then
input_stats.weekly = {count = 0, length = 0, fastest = 0, ts = week_ts}
end
if input_stats.monthly.ts ~= month_ts then
input_stats.monthly = {count = 0, length = 0, fastest = 0, ts = month_ts}
end
if input_stats.yearly.ts ~= year_ts then
input_stats.yearly = {count = 0, length = 0, fastest = 0, ts = year_ts}
end
-- 更新记录
local update = function(stat)
stat.count = stat.count + 1
stat.length = stat.length + input_length
end
update(input_stats.daily)
update(input_stats.weekly)
update(input_stats.monthly)
update(input_stats.yearly)
if input_length > input_stats.daily_max then
input_stats.daily_max = input_length
end
input_stats.lengths[input_length] = (input_stats.lengths[input_length] or 0) + 1
-- 最近一分钟统计
local ts = os.time()
table.insert(input_stats.recent, {ts = ts, len = input_length})
local threshold = ts - 60
local total = 0
local new_recent = {}
for _, item in ipairs(input_stats.recent) do
if item.ts >= threshold then
total = total + item.len
table.insert(new_recent, item)
end
local threshold = now - 60
for _, item in ipairs(speed_buffer) do
if item.ts > threshold then total = total + item.len end
end
input_stats.recent = new_recent
if total > input_stats.daily.fastest then input_stats.daily.fastest = total end
if total > input_stats.weekly.fastest then input_stats.weekly.fastest = total end
if total > input_stats.monthly.fastest then input_stats.monthly.fastest = total end
if total > input_stats.yearly.fastest then input_stats.yearly.fastest = total end
return total
end
-- 表序列化工具(请自行根据实际添加到环境中)
table.serialize = function(tbl)
local lines = {"{"}
for k, v in pairs(tbl) do
local key = (type(k) == "string") and ("[\"" .. k .. "\"]") or ("[" .. k .. "]")
local val
if type(v) == "table" then
val = table.serialize(v)
elseif type(v) == "string" then
val = '"' .. v .. '"'
else
val = tostring(v)
end
table.insert(lines, string.format(" %s = %s,", key, val))
-- -----------------------------------------------------------------------------
-- 数据库操作
-- -----------------------------------------------------------------------------
local function ensure_db_open()
if not db:loaded() then return db:open() end
return true
end
local function db_get(key)
return tonumber(db:fetch(key)) or 0
end
local function db_incr_day_and_total(key_suffix, amount, day_key)
amount = amount or 1
local d_key = day_key .. key_suffix
db:update(d_key, tostring(db_get(d_key) + amount))
local t_key = "total" .. key_suffix
db:update(t_key, tostring(db_get(t_key) + amount))
end
local function db_set_max_day(key_suffix, new_val, day_key)
local d_key = day_key .. key_suffix
if new_val > db_get(d_key) then db:update(d_key, tostring(new_val)) end
local t_key = "total" .. key_suffix
if new_val > db_get(t_key) then db:update(t_key, tostring(new_val)) end
end
local function clear_all_data()
if not ensure_db_open() then return false end
if db.empty then
db:empty()
speed_buffer = {}
return true
end
table.insert(lines, "}")
return table.concat(lines, "\n")
end
-- 保存至文件
local function save_stats()
local path = rime_api.get_user_data_dir() .. "/lua/input_stats.lua"
local file = io.open(path, "w")
if not file then return end
file:write("input_stats = " .. table.serialize(input_stats) .. "\n")
file:close()
end
-- 显示函数(以日统计为例)
local function format_daily_summary()
local s = input_stats.daily
if s.count == 0 then return "※ 今天没有任何记录。" end
return string.format(
"※ 今天的统计:\n%s\n◉ 今天\n共上屏[%d]次\n共输入[%d]字\n最快一分钟输入了[%d]字\n%s\n◉ 方案:%s\n◉ 平台:%s %s\n%s",
string.rep("", 14), s.count, s.length, s.fastest,
string.rep("", 14), schema_name, software_name, software_version,
string.rep("", 14))
end
-- 显示函数(周统计)
local function format_weekly_summary()
local s = input_stats.weekly
if s.count == 0 then return "※ 本周没有任何记录。" end
return string.format(
"※ 本周的统计:\n%s\n◉ 本周共上屏[%d]次\n共输入[%d]字\n最快一分钟输入了[%d]字\n周内单日最多一次输入[%d]字\n%s\n◉ 方案:%s\n◉ 平台:%s %s\n%s",
string.rep("", 14), s.count, s.length, s.fastest, input_stats.daily_max,
string.rep("", 14), schema_name, software_name, software_version,
string.rep("", 14))
end
-- 显示函数(月统计)
local function format_monthly_summary()
local s = input_stats.monthly
if s.count == 0 then return "※ 本月没有任何记录。" end
return string.format(
"※ 本月的统计:\n%s\n◉ 本月共上屏[%d]次\n共输入[%d]字\n最快一分钟输入了[%d]字\n%s\n◉ 方案:%s\n◉ 平台:%s %s\n%s",
string.rep("", 14), s.count, s.length, s.fastest,
string.rep("", 14), schema_name, software_name, software_version,
string.rep("", 14))
end
-- 显示函数(年统计)
local function format_yearly_summary()
local s = input_stats.yearly
if s.count == 0 then return "※ 本年没有任何记录。" end
local length_counts = {}
for length, count in pairs(input_stats.lengths) do
table.insert(length_counts, {length = length, count = count})
local ok, iter = pcall(function() return db:query("") end)
if ok and iter then
local keys = {}
for key, _ in iter do table.insert(keys, key) end
for _, key in ipairs(keys) do db:erase(key) end
speed_buffer = {}
return true
end
table.sort(length_counts, function(a, b) return a.count > b.count end)
local fav = length_counts[1] and length_counts[1].length or 0
return string.format(
"※ 本年的统计:\n%s\n◉ 本年共上屏[%d]次\n共输入[%d]字\n最快一分钟输入了[%d]字\n您最常输入长度为[%d]的词组\n%s\n◉ 方案:%s\n◉ 平台:%s %s\n%s",
string.rep("", 14), s.count, s.length, s.fastest, fav,
string.rep("", 14), schema_name, software_name, software_version,
string.rep("", 14))
return false
end
-- 转换器函数:处理命令 /rtj /ztj /ytj /ntj
local function translator(input, seg, env)
if input:sub(1, 1) ~= "/" then return end
local summary = ""
if input == "/rtj" then
summary = format_daily_summary()
elseif input == "/ztj" then
summary = format_weekly_summary()
elseif input == "/ytj" then
summary = format_monthly_summary()
elseif input == "/ntj" then
summary = format_yearly_summary()
elseif input == "/tj" then
summary = format_daily_summary() .. "\n\n" .. format_weekly_summary() .. "\n\n" .. format_monthly_summary() .. "\n\n" .. format_yearly_summary()
elseif input == "/tjql" then
input_stats = {
daily = {count = 0, length = 0, fastest = 0, ts = 0},
weekly = {count = 0, length = 0, fastest = 0, ts = 0},
monthly = {count = 0, length = 0, fastest = 0, ts = 0},
yearly = {count = 0, length = 0, fastest = 0, ts = 0},
lengths = {},
daily_max = 0,
recent = {}
}
save_stats()
summary = "※ 所有统计数据已清空。"
end
if summary ~= "" then
yield(Candidate("stat", seg.start, seg._end, summary, ""))
-- -----------------------------------------------------------------------------
-- 记录逻辑
-- -----------------------------------------------------------------------------
local function record_stats(hanzi_len, code_len)
if not ensure_db_open() then return end
local now = os.time()
local t = os.date("*t", now)
local day_key = string.format("d_%04d%02d%02d", t.year, t.month, t.day)
table.insert(speed_buffer, {ts = now, len = hanzi_len})
local current_kpm = get_current_kpm(now)
db_incr_day_and_total("_len", hanzi_len, day_key)
db_incr_day_and_total("_cnt", 1, day_key)
db_incr_day_and_total("_code", code_len, day_key)
if hanzi_len == 1 then db_incr_day_and_total("_l1", 1, day_key)
elseif hanzi_len == 2 then db_incr_day_and_total("_l2", 1, day_key)
elseif hanzi_len == 3 then db_incr_day_and_total("_l3", 1, day_key)
elseif hanzi_len == 4 then db_incr_day_and_total("_l4", 1, day_key)
elseif hanzi_len > 4 then db_incr_day_and_total("_l_gt4", 1, day_key)
end
db_set_max_day("_spd", current_kpm, day_key)
end
-- 加载保存的统计数据input_stats.lua
local function load_stats_from_lua_file()
local path = rime_api.get_user_data_dir() .. "/lua/input_stats.lua"
local ok, result = pcall(function()
local env = {}
local f = loadfile(path, "t", env)
if f then f() end
return env.input_stats
end)
if ok and type(result) == "table" then
input_stats = result
else
-- 保底初始化,防止错误
input_stats = {
daily = {count = 0, length = 0, fastest = 0, ts = 0},
weekly = {count = 0, length = 0, fastest = 0, ts = 0},
monthly = {count = 0, length = 0, fastest = 0, ts = 0},
yearly = {count = 0, length = 0, fastest = 0, ts = 0},
lengths = {},
daily_max = 0,
recent = {}
-- -----------------------------------------------------------------------------
-- 聚合查询逻辑
-- -----------------------------------------------------------------------------
local function aggregate_stats(days_lookback)
if not ensure_db_open() then return nil end
if days_lookback == 0 then
local prefix = "total"
return {
len = db_get(prefix .. "_len"),
cnt = db_get(prefix .. "_cnt"),
code = db_get(prefix .. "_code"),
spd = db_get(prefix .. "_spd"),
l1 = db_get(prefix .. "_l1"),
l2 = db_get(prefix .. "_l2"),
l3 = db_get(prefix .. "_l3"),
l4 = db_get(prefix .. "_l4"),
l_gt4 = db_get(prefix .. "_l_gt4")
}
end
local res = {len=0, cnt=0, code=0, spd=0, l1=0, l2=0, l3=0, l4=0, l_gt4=0}
local now_ts = os.time()
for i = 0, days_lookback - 1 do
local target_ts = now_ts - (i * 86400)
local t = os.date("*t", target_ts)
local day_key = string.format("d_%04d%02d%02d", t.year, t.month, t.day)
res.len = res.len + db_get(day_key .. "_len")
res.cnt = res.cnt + db_get(day_key .. "_cnt")
res.code = res.code + db_get(day_key .. "_code")
res.l1 = res.l1 + db_get(day_key .. "_l1")
res.l2 = res.l2 + db_get(day_key .. "_l2")
res.l3 = res.l3 + db_get(day_key .. "_l3")
res.l4 = res.l4 + db_get(day_key .. "_l4")
res.l_gt4 = res.l_gt4 + db_get(day_key .. "_l_gt4")
local daily_spd = db_get(day_key .. "_spd")
if daily_spd > res.spd then res.spd = daily_spd end
end
return res
end
-- -----------------------------------------------------------------------------
-- UI 渲染
-- -----------------------------------------------------------------------------
local function draw_bar(percent)
local length = 10
local filled_len = math.floor((percent / 100) * length)
local empty_len = length - filled_len
return string.rep("", filled_len) .. string.rep("", empty_len)
end
local function format_summary(title, data)
if not data or data.cnt == 0 then return "" .. title .. "暂无数据" end
local avg_code = 0
if data.len > 0 then avg_code = data.code / data.len end
local phrase_rate = 0
if data.len > 0 then phrase_rate = (data.len - data.l1) / data.len * 100 end
-- 估算均速 (Average Speed)
-- 由于没有记录精确的打字时长,这里用一个经验公式估算:
-- 假设每次上屏平均耗时 1.5 - 2 秒左右,以此倒推一个大概的“均速”用于展示
-- 公式:字数 / (次数 * 2秒 / 60)
local estimated_avg_spd = 0
if data.cnt > 0 then
estimated_avg_spd = math.floor(data.len / ((data.cnt * 2) / 60))
-- 修正:如果估算值超过峰值,说明上屏间隔极短(连打),则取峰值的 60%
if estimated_avg_spd > data.spd then estimated_avg_spd = math.floor(data.spd * 0.6) end
if estimated_avg_spd == 0 and data.len > 0 then estimated_avg_spd = data.len end -- 极少字数保底
end
local p1 = (data.l1 / data.cnt) * 100
local p2 = (data.l2 / data.cnt) * 100
local p3 = (data.l3 / data.cnt) * 100
local p4 = (data.l4 / data.cnt) * 100
local p_gt4 = (data.l_gt4 / data.cnt) * 100
local ver = rime_api.get_distribution_version() or ""
return string.format(
"※ %s统计 · 效率仪表盘\n" ..
"───────────────────\n" ..
"📊 综合数据\n" ..
" 总字数:%d\t上屏:%d\n" ..
" 峰值速:%d\t均速:%d\n" ..
"───────────────────\n" ..
"⚡ 核心效率\n" ..
" 平均编码:%.2f 键/字\n" ..
" 词组连打:%.1f %%\n" ..
"───────────────────\n" ..
"📈 字词分布\n" ..
" [1] %3d%% %s\n" ..
" [2] %3d%% %s\n" ..
" [3] %3d%% %s\n" ..
" [4] %3d%% %s\n" ..
" [≥5] %2d%% %s\n" ..
"───────────────────\n" ..
"◉ 方案:%s\n" ..
"◉ 平台:%s %s",
title, data.len, data.cnt,
data.spd, estimated_avg_spd, -- 峰值与均速并排
avg_code, phrase_rate,
p1, draw_bar(p1),
p2, draw_bar(p2),
p3, draw_bar(p3),
p4, draw_bar(p4),
p_gt4, draw_bar(p_gt4), -- 改为 ≥5
schema_name, software_name, ver
)
end
-- -----------------------------------------------------------------------------
-- Init & Fini
-- -----------------------------------------------------------------------------
local function init(env)
ensure_db_open()
if env.stat_notifier then env.stat_notifier:disconnect() end
local ctx = env.engine.context
-- 加载历史统计数据
load_stats_from_lua_file()
-- 注册提交通知回调
ctx.commit_notifier:connect(function()
env.stat_notifier = ctx.commit_notifier:connect(function(ctx)
local commit_text = ctx:get_commit_text()
if not commit_text or commit_text == "" then return end
if commit_text:sub(1, 1) == "/" then return end
if commit_text:find("^[※◉]") then return end
-- 排除统计命令(如 /rtj、/tj 等)
if is_summary_command(commit_text) then return end
local hanzi_len = get_pure_chinese_length(commit_text)
if hanzi_len == 0 then return end
local script_text = ctx:get_script_text() or ""
local code_len = string.len(script_text)
if code_len == 0 then code_len = hanzi_len * 2 end
-- 排除统计候选上屏内容(例如 "※ 今天..." 或 "◉ 本年..."
if commit_text:match("^[※◉]") then return end
-- 排除我们自己生成的统计候选comment 是 "input_stats_summary"
-- local cand = ctx:get_selected_candidate()
-- if cand and cand.comment == "input_stats_summary" then return end
-- 保存最近一次 commit 内容
local now_ms = os.clock()
if env.last_commit_time and (now_ms - env.last_commit_time < 0.05) then
if env.last_commit_text == commit_text then return end
end
env.last_commit_time = now_ms
env.last_commit_text = commit_text
-- 统计长度
local input_length = utf8.len(commit_text) or string.len(commit_text)
update_stats(input_length)
save_stats()
record_stats(hanzi_len, code_len)
end)
end
return { init = init, func = translator }
local function fini(env)
if env.stat_notifier then
env.stat_notifier:disconnect()
env.stat_notifier = nil
end
if db and db:loaded() then
db:close()
end
end
local function translator(input, seg, env)
if input:sub(1, 1) ~= "/" then return end
local summary = ""
local data = nil
local title = ""
if input == "/tjql" then
if clear_all_data() then
yield(Candidate("stat", seg.start, seg._end, "※ 统计数据已全部清空。", "🗑️"))
else
yield(Candidate("stat", seg.start, seg._end, "※ 数据清空失败,请检查权限。", ""))
end
return
end
if input == "/rtj" then title = "今日"; data = aggregate_stats(1)
elseif input == "/ztj" then title = "七日"; data = aggregate_stats(7)
elseif input == "/ytj" then title = "卅日"; data = aggregate_stats(30)
elseif input == "/ntj" then title = "本年"; data = aggregate_stats(365)
elseif input == "/ttj" then title = "生涯"; data = aggregate_stats(0)
end
if data then
summary = format_summary(title, data)
yield(Candidate("stat", seg.start, seg._end, summary, "📊"))
end
end
return { init = init, func = translator, fini = fini }