-- lua/super_english.lua
-- https://github.com/amzxyz/rime_wanxiang
-- @description: 英文全能处理器 (Fix: 动态分隔符兼容)
-- @author: amzxyz
-- 
-- 核心功能清单:
-- 1. [Format] 语句级英文大写格式化,逐词大小写对应 (look HELLO -> look HELLO)
-- 2. [Spacing] 智能语句空格切分，智能单词上屏加空格 (Smart Spacing) 与无损分词还原
-- 3. [Memory] 全量历史缓存，完美解决回删乱码问题
-- 4. [Construct] 原生优先构造策略 (短词无分词则重置为原生输入)
-- 5. [Order] 单字母(a/A) 智能插队排序,补齐单字母候选
local F = {}

-- 引入常用函数
local byte = string.byte
local find = string.find
local gsub = string.gsub
local upper = string.upper
local lower = string.lower
local sub = string.sub
local match = string.match
local format = string.format

--====================================================
-- 1. 基础工具函数
--====================================================

local function pure(s)
    return gsub(s, "[^a-zA-Z]", ""):lower()
end

local function is_ascii_phrase_fast(s)
    if s == "" then return false end
    for i = 1, #s do
        local b = byte(s, i)
        if b > 127 then return false end 
    end
    return true
end

local function has_letters(s)
    return find(s, "[a-zA-Z]")
end

-- 序列匹配：返回 (首字母位置, 最后一个匹配字符的位置)
local function find_target_in_text(text, start_pos, target_fp)
    local text_len = #text
    local target_len = #target_fp
    if target_len == 0 then return nil, nil end

    local t_idx = 1       
    local scan_p = start_pos 
    local s_index = nil   

    while scan_p <= text_len and t_idx <= target_len do
        local char_txt = sub(text, scan_p, scan_p)
        if lower(char_txt) == sub(target_fp, t_idx, t_idx) then
            if t_idx == 1 then s_index = scan_p end 
            t_idx = t_idx + 1
        end
        scan_p = scan_p + 1
    end

    if t_idx > target_len then
        return s_index, scan_p - 1
    end
    return nil, nil
end

--====================================================
-- 2. 核心逻辑：格式化与还原
--====================================================

-- [核心修复] 使用锚点切分法
local function restore_sentence_spacing(cand, split_pattern, check_pattern)
    local guide = cand.preedit or ""
    
    -- 1. 只有存在分隔符时才介入
    if not find(guide, check_pattern) then return cand end

    local text = cand.text
    
    -- 2. 提取所有目标片段 (hi'vcs -> {hi, vcs})
    local targets = {}
    for seg in string.gmatch(guide, split_pattern) do
        local t = pure(seg)
        if #t > 0 then table.insert(targets, t) end
    end
    if #targets == 0 then return cand end

    -- 3. 寻找所有片段在 text 中的“起始锚点”
    local starts = {}
    local p = 1
    for _, target in ipairs(targets) do
        -- 注意：这里只需要 s (起始位置) 和 e (用于更新搜索进度)
        local s, e = find_target_in_text(text, p, target)
        if not s then 
            -- 如果任何一段对不上 (说明 preedit 和 text 不匹配)，则放弃处理，原样返回
            return cand 
        end
        table.insert(starts, s)
        p = e + 1 
    end

    -- 4. 根据锚点进行切分
    local parts = {}
    
    -- 处理第一段之前的残留文本 (如果有)
    if starts[1] > 1 then
        table.insert(parts, sub(text, 1, starts[1] - 1))
    end

    for i = 1, #starts do
        local current_s = starts[i]
        local next_s = starts[i+1] -- 下一段的起点
        local chunk_end
        
        if next_s then
            -- 如果有下一段，当前段结束于下一段起点之前
            chunk_end = next_s - 1
        else
            -- 如果是最后一段，一直延伸到文本末尾 (修复了 vcs -> ystem 丢失的问题)
            chunk_end = #text
        end
        
        table.insert(parts, sub(text, current_s, chunk_end))
    end

    -- 5. 拼接并清理多余空格
    local new_text = table.concat(parts, " ")
    new_text = gsub(new_text, "%s%s+", " ") 
    
    if new_text == "" then return cand end
    
    local nc = Candidate(cand.type, cand.start, cand._end, new_text, cand.comment)
    nc.preedit = cand.preedit
    return nc
end

local NBSP = string.char(0xC2, 0xA0)

local function apply_segment_formatting(text, input_code)
    if not input_code or input_code == "" then return text end
    
    local parts = {}
    local p_code = 1 
    
    for word in string.gmatch(text, "%S+") do
        local clean_word = pure(word)
        local w_len = #clean_word
        
        if w_len > 0 then
            if find(word, "[\128-\255]") then
                local input_remain = #input_code - p_code + 1
                if input_remain > 0 then
                     local check_len = (w_len < input_remain) and w_len or input_remain
                     p_code = p_code + check_len
                end
            else
                local input_remain = #input_code - p_code + 1
                if input_remain > 0 then
                    local check_len = (w_len < input_remain) and w_len or input_remain
                    local segment = sub(input_code, p_code, p_code + check_len - 1)
                    local is_pure_alpha = not find(word, "[^a-zA-Z]")
                    
                    if find(segment, "^%u%u") and is_pure_alpha then
                        word = upper(word)
                    elseif find(segment, "^%u") then
                        word = gsub(word, "^%a", upper)
                    end
                    p_code = p_code + check_len
                end
            end
        end
        table.insert(parts, word)
    end
    
    return table.concat(parts, " ")
end

local function apply_formatting(cand, code_ctx)
    local text = cand.text
    if not text or text == "" then return cand end
    local changed = false
    
    local norm = gsub(text, NBSP, " ")
    if norm ~= text then text = norm; changed = true end

    if is_ascii_phrase_fast(text) and has_letters(text) then
        if code_ctx.raw_input then
            local new_text = apply_segment_formatting(text, code_ctx.raw_input)
            if new_text ~= text then 
                text = new_text
                changed = true 
            end
        end

        if code_ctx.spacing_mode and code_ctx.spacing_mode ~= "off" then
            local mode = code_ctx.spacing_mode
            if mode == "smart" then
                if code_ctx.prev_is_eng then 
                    if not find(text, "^%s") then text = " " .. text; changed = true end
                end
            elseif mode == "before" then 
                if not find(text, "^%s") then text = " " .. text; changed = true end
            elseif mode == "after" then 
                if not find(text, "%s$") then text = text .. " "; changed = true end
            end
        end
    end

    if not changed then return cand end
    local nc = Candidate(cand.type, cand.start, cand._end, text, cand.comment)
    nc.preedit = cand.preedit
    return nc
end

--====================================================
-- 3. 状态管理 (Filter)
--====================================================

function F.init(env)
    env.memory = {}
    local cfg = env.engine.schema.config
    
    env.english_spacing_mode = "off"
    if cfg then
        local str = cfg:get_string("english_spacing")
        if str then env.english_spacing_mode = str end
    end
    
    -- 读取分隔符 (兼容空格和自定义符号)
    local delimiter_str = " '" 
    if cfg then
        delimiter_str = cfg:get_string('speller/delimiter') or delimiter_str
    end
    
    local escaped_delims = gsub(delimiter_str, "([%%%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1")
    env.split_pattern = "[^" .. escaped_delims .. "]+"     
    env.delim_check_pattern = "[" .. escaped_delims .. "]" 

    env.prev_commit_is_eng = false
    if env.engine.context then
        env.commit_notifier = env.engine.context.commit_notifier:connect(function(ctx)
            local commit_text = ctx:get_commit_text()
            local is_eng = is_ascii_phrase_fast(commit_text)
            if not is_eng then
                local clean = gsub(commit_text, "%s+$", "") 
                if clean == "," or clean == "." or clean == "!" or clean == "?" then is_eng = true end
            end
            env.prev_commit_is_eng = is_eng
            ctx:set_property("english_spacing", "") 
        end)
    end
end

function F.fini(env)
    if env.commit_notifier then env.commit_notifier:disconnect(); env.commit_notifier = nil end
    env.memory = nil
end

--====================================================
-- 4. 主逻辑 (Filter)
--====================================================

function F.func(input, env)
    local ctx = env.engine.context
    local curr_input = ctx.input
    local has_valid_candidate = false
    local best_candidate_saved = false
    local code_len = #curr_input
    
    local break_signal = (ctx:get_property("english_spacing") == "true")
    local effective_prev_is_eng = env.prev_commit_is_eng
    if break_signal then effective_prev_is_eng = false end

    local code_ctx = {
        raw_input = curr_input, 
        spacing_mode = env.english_spacing_mode,
        prev_is_eng = effective_prev_is_eng
    }

    local single_char_injected = false
    local c_lower, c_upper = nil, nil
    if code_len == 1 then
        local b = byte(curr_input)
        if (b >= 65 and b <= 90) or (b >= 97 and b <= 122) then
            local lower_t = lower(curr_input)
            local upper_t = upper(curr_input)
            c_lower = Candidate("completion", 0, 1, lower_t, "")
            c_upper = Candidate("completion", 0, 1, upper_t, "")
        else single_char_injected = true end
    else single_char_injected = true end

    for cand in input:iter() do
        -- 传入 Pattern 进行智能还原
        local good_cand = restore_sentence_spacing(cand, env.split_pattern, env.delim_check_pattern)
        local fmt_cand = apply_formatting(good_cand, code_ctx)
        local is_ascii = is_ascii_phrase_fast(fmt_cand.text)
        
        if not single_char_injected and is_ascii and c_lower then
            if not best_candidate_saved then
                env.memory[curr_input] = { text = c_lower.text, preedit = c_lower.text }
                best_candidate_saved = true
            end
            yield(c_lower)
            yield(c_upper)
            single_char_injected = true
            has_valid_candidate = true 
        end

        local is_garbage = (cand.type == "raw") or (fmt_cand.text == curr_input)
        
        if not is_garbage then
            has_valid_candidate = true
            if not best_candidate_saved and cand.comment ~= "~" then
                env.memory[curr_input] = {
                    text = fmt_cand.text,
                    preedit = fmt_cand.preedit or fmt_cand.text
                }
                best_candidate_saved = true
            end
        end
        yield(fmt_cand)
    end

    if not single_char_injected and c_lower then
        if not best_candidate_saved then
            env.memory[curr_input] = { text = c_lower.text, preedit = c_lower.text }
            best_candidate_saved = true
        end
        yield(c_lower)
        yield(c_upper)
        has_valid_candidate = true
    end

    -- [Phase 3] 构造补全
    if not has_valid_candidate then
        if not has_letters(curr_input) then return end
        local anchor = nil
        local diff = ""
        
        for i = #curr_input - 1, 1, -1 do
            local prefix = sub(curr_input, 1, i)
            if env.memory[prefix] then
                anchor = env.memory[prefix]
                diff = sub(curr_input, i + 1)
                break
            end
        end
        
        if anchor and diff ~= "" then
            local has_spacing = find(anchor.text, " ")
            local last_word = match(anchor.text, "(%S+)%s*$") or ""
            local last_len = #last_word
            
            local output_text = ""
            local output_preedit = ""
            
            if has_spacing then
                output_text = anchor.text .. diff
                output_preedit = (anchor.preedit or anchor.text) .. diff
            elseif last_len > 3 then
                local spacer = " "
                if sub(anchor.text, -1) == " " then spacer = "" end
                output_text = anchor.text .. spacer .. diff
                output_preedit = (anchor.preedit or anchor.text) .. spacer .. diff
            else
                output_text = curr_input
                output_preedit = curr_input
            end
            
            output_text = apply_segment_formatting(output_text, curr_input)
            
            local cand = Candidate("completion", 0, #curr_input, output_text, "~")
            cand.preedit = output_preedit
            cand.quality = 9999999
            yield(cand)
        else
            local cand = Candidate("completion", 0, #curr_input, curr_input, "~")
            cand.preedit = curr_input
            yield(cand)
        end
    end
end

return F