From 00c65a098384e91a007d7daebf0519fee8cbc8d8 Mon Sep 17 00:00:00 2001
From: wumode <wumo1999@gmail.com>
Date: Wed, 10 Dec 2025 21:22:29 +0800
Subject: [PATCH] feat(lexiannot): Integrate LLM for advanced vocabulary
 processing

---
 package.v2.json                       |    3 +-
 plugins.v2/lexiannot/README.md        |   36 +-
 plugins.v2/lexiannot/__init__.py      | 2816 +++++++++++++------------
 plugins.v2/lexiannot/agenttool.py     |   67 +
 plugins.v2/lexiannot/lexicon.py       |  116 +
 plugins.v2/lexiannot/pipeline.py      |  736 +++++++
 plugins.v2/lexiannot/query_gemini.py  |  111 -
 plugins.v2/lexiannot/requirements.txt |    3 +-
 plugins.v2/lexiannot/schemas.py       |  394 ++++
 plugins.v2/lexiannot/spacyworker.py   |   36 +-
 plugins.v2/lexiannot/subtitle.py      |   44 +
 11 files changed, 2854 insertions(+), 1508 deletions(-)
 create mode 100644 plugins.v2/lexiannot/agenttool.py
 create mode 100644 plugins.v2/lexiannot/lexicon.py
 create mode 100644 plugins.v2/lexiannot/pipeline.py
 delete mode 100644 plugins.v2/lexiannot/query_gemini.py
 create mode 100644 plugins.v2/lexiannot/schemas.py
 create mode 100644 plugins.v2/lexiannot/subtitle.py

diff --git a/package.v2.json b/package.v2.json
index cf17c9e..5152513 100644
--- a/package.v2.json
+++ b/package.v2.json
@@ -540,11 +540,12 @@
     "name": "美剧生词标注",
     "description": "根据CEFR等级，为英语影视剧标注高级词汇。",
     "labels": "英语",
-    "version": "1.1.4",
+    "version": "1.2.0",
     "icon": "LexiAnnot.png",
     "author": "wumode",
     "level": 1,
     "history": {
+      "v1.2.0": "引入大模型候选词决策和词义丰富处理链; 支持读取系统智能体配置; 添加智能体工具; 优化通知样式; 改进 UI",
       "v1.1.4": "优化字幕选择决策",
       "v1.1.3": "适配 Pydantic V2 (主程序版本需高于 2.8.1-1)",
       "v1.1.2": "使用子进程避免 spaCy 模型常驻内存",
diff --git a/plugins.v2/lexiannot/README.md b/plugins.v2/lexiannot/README.md
index 1bb9055..22a68f3 100644
--- a/plugins.v2/lexiannot/README.md
+++ b/plugins.v2/lexiannot/README.md
@@ -1,26 +1,32 @@
 # 美剧生词标注
 
 根据CEFR等级，为英语影视剧标注高级词汇。
+___
+在影视剧入库后，LexiAnnot 会读取媒体文件的MediaInfo和文件列表，如果视频的原始语言为英语并且包含英文文本字幕，LexiAnnot将为其生成包含词汇注释的`.en.ass`字幕文件。
 
-在影视剧入库后，LexiAnnot会读取媒体文件的MediaInfo和文件列表，如果视频的原始语言为英语并且包含英文文本字幕，LexiAnnot将为其生成包含词汇注释的.ass字幕文件。
+## 主要功能
 
 ![](https://images2.imgbox.com/d6/b6/kZu6EH2a_o.png)
 ![](https://images2.imgbox.com/c8/3a/rEJBWu5v_o.png)
-![](https://images2.imgbox.com/97/b7/d6RXFtwD_o.png)
+![](https://images2.imgbox.com/56/c0/FBhJMvRD_o.jpg)
 ![](https://images2.imgbox.com/8a/d4/AtgOe265_o.jpg)
 
-# Gemini
+- 识别视频的原始语言和字幕语言
+- 自动适应原字幕样式
+- 俚语 / 自造词 / 熟词生义标注和解释
 
-- **[获取APIKEY](https://aistudio.google.com/app/apikey)**
-- **[速率限制](https://ai.google.dev/gemini-api/docs/rate-limits)**
+## 使用配置
 
-**确保可以正常访问下面的域名**
+- spaCy 模型
+  - spaCy 用于词形还原、POS 标注和命名实体识别，`en_core_web_sm`或`en_core_web_md` 已足够满足需求。
+- LLM 设置
+  - 一集影视剧的字幕通常包含数千个单词，建议使用支持长文本输入的模型，选择一个适当的上下文窗口大小。
+  - 处理 60 min 的影视剧字幕大约会消耗 `60K`~`80K` token，具体取决于字幕内容。
+  - 配置请参考 MoviePilot 智能助手的设置部分。
+- Agent 工具
+  - 在聊天中使用 `/ai` 命令告诉智能助手你要标注的影视剧。
 
-- googleapis.com
-- google.dev
-- aistudio.google.com
-
-# CEFR
+## CEFR
 
 CEFR全称是Common European Framework of Reference for Languages。
 
@@ -36,20 +42,18 @@ CEFR全称是Common European Framework of Reference for Languages。
   - **C1** (高级/Advanced)：能够理解各种较长、要求较高的文本，并能识别隐含意义，表达流利、自然，能灵活有效地使用语言来应对各种目的。
   - **C2** (精通/Proficient)：能够轻松理解几乎所有听到的或读到的内容，能够非常流利、准确、精细地表达自己，即使在复杂的情况下也能区分细微的含义。
 
-# 计划
+## 计划
 
 - 双语字幕支持
 - ~~考试词汇标注~~
 
-# FAQ
+## FAQ
 
-- **为什么需要用到Gemini**
-  - LexiAnnot使用的词典仅包含约18000个单词，无法覆盖影视剧中的海量的俚语、习语、流行语等更广泛的表达形式
 - **只能处理已有字幕的视频吗？**
   - 是的，视频需要包含**英文文本字幕**
 - **为什么无法处理一些包含字幕视频**
   - 目前无法识别基于图片的字幕(通常是特效字幕)
 
-# 感谢
+## 感谢
 
 - [coca-vocabulary-20000](https://github.com/llt22/coca-vocabulary-20000)
\ No newline at end of file
diff --git a/plugins.v2/lexiannot/__init__.py b/plugins.v2/lexiannot/__init__.py
index 51049e0..7c375aa 100644
--- a/plugins.v2/lexiannot/__init__.py
+++ b/plugins.v2/lexiannot/__init__.py
@@ -5,19 +5,16 @@ import re
 import shutil
 import subprocess
 import sys
-import time
 import threading
-import uuid
 from collections import Counter
 from datetime import datetime
-from enum import Enum
 from pathlib import Path
-from typing import Any, Dict, List, Tuple, Optional
+from typing import Any, Dict, List, Tuple, Optional, Literal
 
-import pysubs2
 import pymediainfo
 from langdetect import detect
-from pysubs2 import SSAFile, SSAEvent
+from langchain_community.callbacks import get_openai_callback
+from pysubs2 import SSAFile, SSAEvent, SSAStyle, Color, Alignment
 
 from app.core.config import settings
 from app.helper.directory import DirectoryHelper
@@ -25,54 +22,34 @@ from app.log import logger
 from app.plugins import _PluginBase
 from app.core.cache import cached
 from app.core.event import eventmanager, Event
-from app.schemas.types import NotificationType
+from app.schemas import Response
+from app.schemas.types import NotificationType, MediaType
 from app.utils.http import RequestUtils
 from app.utils.string import StringUtils
-from app.schemas import TransferInfo
+from app.schemas import TransferInfo, Context
 from app.schemas.types import EventType
 from app.core.context import MediaInfo
-from app.plugins.lexiannot.query_gemini import (
-    DialogueTranslationTask, VocabularyTranslationTask, Vocabulary, Context, TranslationTasks, translate, T
+from app.chain.media import MediaChain
+
+from .agenttool import VocabularyAnnotatingTool
+from .lexicon import Lexicon
+from .schemas import (
+    IDGenerator,
+    TaskStatus,
+    Task,
+    TasksApiParams,
+    ProcessResult,
+    SegmentList,
+    TaskParams, SegmentStatistics,
+)
+from .spacyworker import SpacyWorker
+from .subtitle import SubtitleProcessor, style_text
+from .pipeline import (
+    extract_advanced_words,
+    llm_process_chain,
+    initialize_llm,
+    UNIVERSAL_POS_MAP,
 )
-from app.plugins.lexiannot.spacyworker import SpacyWorker
-
-
-class TaskStatus(Enum):
-    PENDING = "pending"
-    RUNNING = "running"
-    COMPLETED = "completed"
-    FAILED = "failed"
-    CANCELED = "canceled"
-    IGNORED = "ignored"
-
-
-class Task:
-
-    def __init__(self, video_path: str,
-                 task_id: Optional[str] = None,
-                 status: TaskStatus = TaskStatus.PENDING,
-                 add_time: Optional[datetime] = None,
-                 complete_time: Optional[datetime] = None,
-                 tokens_used: int = 0):
-        self.task_id = task_id or str(uuid.uuid4())
-        self.video_path = video_path
-        self.status: TaskStatus = status
-        self.add_time: Optional[datetime] = add_time
-        self.complete_time: Optional[datetime] = complete_time
-        self.tokens_used: int = tokens_used
-
-    def __repr__(self):
-        return f"<Task {self.task_id[:8]} status={self.status} video={self.video_path}>"
-
-    def to_dict(self):
-        return {
-            "task_id": self.task_id,
-            "video_path": self.video_path,
-            "status": self.status.value,
-            "add_time": self.add_time.isoformat() if self.add_time else None,
-            "complete_time": self.complete_time.isoformat() if self.complete_time else None,
-            "tokens_used": self.tokens_used
-        }
 
 
 class LexiAnnot(_PluginBase):
@@ -83,7 +60,7 @@ class LexiAnnot(_PluginBase):
     # 插件图标
     plugin_icon = "LexiAnnot.png"
     # 插件版本
-    plugin_version = "1.1.4"
+    plugin_version = "1.2.0"
     # 插件作者
     plugin_author = "wumode"
     # 作者主页
@@ -96,7 +73,7 @@ class LexiAnnot(_PluginBase):
     auth_level = 1
 
     _enabled: bool = False
-    _annot_level = ''
+    _annot_level = ""
     _send_notify = False
     _onlyonce = False
     _show_vocabulary_detail = False
@@ -104,32 +81,35 @@ class LexiAnnot(_PluginBase):
     _sentence_translation = False
     _in_place = False
     _enable_gemini = False
-    _gemini_model = ''
-    _gemini_apikey = ''
+    _gemini_model = ""
+    _gemini_apikey: str | None = None
+    _llm_provider = "google"
+    _llm_base_url = ""
+
     _context_window: int = 0
     _max_retries: int = 0
-    _request_interval: int = 0
-    _ffmpeg_path: str = 'ffmpeg'
+    _ffmpeg_path: str = "ffmpeg"
     _english_only = False
     _when_file_trans = False
-    _model_temperature = ''
-    _custom_files = ''
-    _accent_color = ''
-    _font_scaling = ''
-    _opacity = ''
+    _model_temperature = ""
+    _custom_files = ""
+    _accent_color = ""
+    _font_scaling = ""
+    _opacity = ""
     _exam_tags: List[str] = []
-    _spacy_model: str = ''
+    _spacy_model: str = ""
     _delete_data: bool = False
     _libraries: List[str] = []
+    _use_mp_agent: bool = False
+    _use_proxy: bool = False
 
     # protected variables
-    _lexicon_repo = 'https://raw.githubusercontent.com/wumode/LexiAnnot/'
+    _lexicon_repo = "https://raw.githubusercontent.com/wumode/LexiAnnot/"
     _worker_thread = None
     _task_queue: queue.Queue[Task] = queue.Queue()
     _shutdown_event = None
-    _total_token_count = 0
     _venv_python = None
-    _query_gemini_script = ''
+    _query_gemini_script = ""
     _gemini_available = False
     _accent_color_rgb = None
     _color_alpha = 0
@@ -142,42 +122,50 @@ class LexiAnnot(_PluginBase):
         self.stop_service()
         if config:
             self._enabled = bool(config.get("enabled"))
-            self._annot_level = config.get("annot_level") or 'C1'
+            self._annot_level = config.get("annot_level") or "C1"
             self._send_notify = config.get("send_notify")
             self._onlyonce = config.get("onlyonce")
             self._show_vocabulary_detail = config.get("show_vocabulary_detail")
             self._sentence_translation = config.get("sentence_translation")
             self._in_place = config.get("in_place")
             self._enable_gemini = config.get("enable_gemini")
-            self._gemini_model = config.get("gemini_model") or 'gemini-2.0-flash'
-            self._gemini_apikey = config.get("gemini_apikey") or ''
+            self._gemini_model = config.get("gemini_model") or "gemini-2.5-flash"
+            self._gemini_apikey = config.get("gemini_apikey") or ""
             self._context_window = int(config.get("context_window") or 10)
+            self._context_window = max(5, min(self._context_window, 50))
             self._max_retries = int(config.get("max_retries") or 3)
-            self._request_interval = int(config.get("request_interval") or 3)
-            self._ffmpeg_path = config.get("ffmpeg_path") or 'ffmpeg'
+            self._ffmpeg_path = config.get("ffmpeg_path") or "ffmpeg"
             self._english_only = config.get("english_only")
             self._when_file_trans = config.get("when_file_trans")
-            self._model_temperature = config.get("model_temperature") or '0.3'
+            self._model_temperature = config.get("model_temperature") or "0.3"
             self._show_phonetics = config.get("show_phonetics")
-            self._custom_files = config.get("custom_files")
+            self._custom_files = config.get("custom_files") or ""
             self._accent_color = config.get("accent_color")
-            self._font_scaling = config.get("font_scaling") or '1'
-            self._opacity = config.get("opacity") or '0'
-            self._spacy_model = config.get("spacy_model") or 'en_core_web_sm'
+            self._font_scaling = config.get("font_scaling") or "1"
+            self._opacity = config.get("opacity") or "0"
+            self._spacy_model = config.get("spacy_model") or "en_core_web_sm"
             self._exam_tags = config.get("exam_tags") or []
             self._delete_data = config.get("delete_data") or False
             self._libraries = config.get("libraries") or []
+            self._llm_base_url = config.get("llm_base_url") or ""
+            self._llm_provider = config.get("llm_provider") or "google"
+            self._use_mp_agent = config.get("use_mp_agent") or False
+            self._use_proxy = config.get("use_proxy") or False
 
-            libraries = [library.name for library in DirectoryHelper().get_library_dirs()]
-            self._libraries = [library for library in self._libraries if library in libraries]
-            self._accent_color_rgb = LexiAnnot.hex_to_rgb(self._accent_color) or (255, 255, 0)
+            libraries = [
+                library.name for library in DirectoryHelper().get_library_dirs()
+            ]
+            self._libraries = [
+                library for library in self._libraries if library in libraries
+            ]
+            self._accent_color_rgb = LexiAnnot.hex_to_rgb(self._accent_color) or (255, 255, 0,)
             self._color_alpha = int(self._opacity) if self._opacity and len(self._opacity) else 0
         if self._delete_data:
             # 删除不再保存在数据库的数据
-            self.del_data('cefr_lexicon')
-            self.del_data('coca2k_lexicon')
-            self.del_data('swear_words')
-            self.del_data('lexicon_version')
+            self.del_data("cefr_lexicon")
+            self.del_data("coca2k_lexicon")
+            self.del_data("swear_words")
+            self.del_data("lexicon_version")
             self.delete_data()
             self._delete_data = False
             self._loaded = False
@@ -196,10 +184,10 @@ class LexiAnnot(_PluginBase):
                     if task.status == TaskStatus.PENDING:
                         self._task_queue.put(task)
 
-            self._query_gemini_script = str(settings.ROOT_PATH / "app" / "plugins" / "lexiannot" / "query_gemini.py")
-
             self._shutdown_event = threading.Event()
-            self._worker_thread = threading.Thread(target=self.__process_tasks, daemon=True)
+            self._worker_thread = threading.Thread(
+                target=self.__process_tasks, daemon=True
+            )
             self._worker_thread.start()
 
             if self._onlyonce:
@@ -215,691 +203,749 @@ class LexiAnnot(_PluginBase):
         """
         拼装插件配置页面，需要返回两块数据：1、页面配置；2、数据结构
         """
-        library_options = [{'title': library.name,'value': library.name}
-                           for library in DirectoryHelper().get_library_dirs()]
+        library_options = [
+            {"title": library.name, "value": library.name}
+            for library in DirectoryHelper().get_library_dirs()
+        ]
         return [
             {
-                'component': 'VForm',
-                'content': [
+                "component": "VForm",
+                "content": [
                     {
-                        'component': 'VRow',
-                        'content': [
+                        "component": "VRow",
+                        "content": [
                             {
-                                'component': 'VCol',
-                                'props': {
-                                    'cols': 12,
-                                    'md': 3
-                                },
-                                'content': [
+                                "component": "VCol",
+                                "props": {"cols": 12, "md": 3},
+                                "content": [
                                     {
-                                        'component': 'VSwitch',
-                                        'props': {
-                                            'model': 'enabled',
-                                            'label': '启用插件',
-                                        }
-                                    }
-                                ]
-                            },
-
-                            {
-                                'component': 'VCol',
-                                'props': {
-                                    'cols': 12,
-                                    'md': 3
-                                },
-                                'content': [
-                                    {
-                                        'component': 'VSwitch',
-                                        'props': {
-                                            'model': 'send_notify',
-                                            'label': '发送通知',
-                                        }
-                                    }
-                                ]
-                            },
-                            {
-                                'component': 'VCol',
-                                'props': {
-                                    'cols': 12,
-                                    'md': 3
-                                },
-                                'content': [
-                                    {
-                                        'component': 'VSwitch',
-                                        'props': {
-                                            'model': 'onlyonce',
-                                            'label': '手动运行一次',
-                                        }
-                                    }
-                                ]
-                            },
-                            {
-                                'component': 'VCol',
-                                'props': {
-                                    'cols': 12,
-                                    'md': 3
-                                },
-                                'content': [
-                                    {
-                                        'component': 'VSwitch',
-                                        'props': {
-                                            'model': 'delete_data',
-                                            'label': '插件数据清理',
-                                        }
-                                    }
-                                ]
-                            }
-                        ]
-                    },
-                    {
-                        'component': 'VTabs',
-                        'props': {
-                            'model': '_tabs',
-                            'style': {
-                                'margin-top': '8px',
-                                'margin-bottom': '16px'
-                            },
-                            'stacked': True,
-                            'fixed-tabs': True
-                        },
-                        'content': [
-                            {
-                                'component': 'VTab',
-                                'props': {
-                                    'value': 'base_tab'
-                                },
-                                'text': '基本设置'
-                            }, {
-                                'component': 'VTab',
-                                'props': {
-                                    'value': 'subtitle_tab'
-                                },
-                                'text': '字幕设置'
-                            }, {
-                                'component': 'VTab',
-                                'props': {
-                                    'value': 'gemini_tab'
-                                },
-                                'text': 'Gemini设置'
-                            }
-                        ]
-                    },
-                    {
-                        'component': 'VWindow',
-                        'props': {
-                            'model': '_tabs'
-                        },
-                        'content': [
-                            {
-                                'component': 'VWindowItem',
-                                'props': {
-                                    'value': 'base_tab'
-                                },
-                                'content': [
-                                    {
-                                        'component': 'VRow',
-                                        'props': {
-                                            'style': {
-                                                'margin-top': '0px'
-                                            }
+                                        "component": "VSwitch",
+                                        "props": {
+                                            "model": "enabled",
+                                            "label": "启用插件",
                                         },
-                                        'content': [
-                                            {
-                                                'component': 'VCol',
-                                                'props': {
-                                                    'cols': 12,
-                                                    'md': 4
-                                                },
-                                                'content': [
-                                                    {
-                                                        'component': 'VSwitch',
-                                                        'props': {
-                                                            'model': 'when_file_trans',
-                                                            'label': '监控入库',
-                                                        }
-                                                    }
-                                                ]
-                                            },
-                                            {
-                                                'component': 'VCol',
-                                                'props': {
-                                                    'cols': 12,
-                                                    'md': 4
-                                                },
-                                                'content': [
-                                                    {
-                                                        'component': 'VSelect',
-                                                        'props': {
-                                                            'model': 'spacy_model',
-                                                            'label': 'spaCy模型',
-                                                            'hint': 'spaCy 模型用于分词和词性标注，推荐使用 Small',
-                                                            'items': [
-                                                                {'title': 'Small (~12 MB)', 'value': 'en_core_web_sm'},
-                                                                {'title': 'Medium (~30 MB)', 'value': 'en_core_web_md'},
-                                                                {'title': 'Large (700+ MB)', 'value': 'en_core_web_lg'},
-                                                                {'title': 'Transformer (400+ MB)',
-                                                                 'value': 'en_core_web_trf'},
-                                                            ]
-                                                        }
-                                                    }
-                                                ]
-                                            },
-                                            {
-                                                'component': 'VCol',
-                                                'props': {
-                                                    'cols': 12,
-                                                    'md': 4
-                                                },
-                                                'content': [
-                                                    {
-                                                        'component': 'VSelect',
-                                                        'props': {
-                                                            'model': 'annot_level',
-                                                            'label': '标注词汇的最低CEFR等级',
-                                                            'items': [
-                                                                {'title': 'B1', 'value': 'B1'},
-                                                                {'title': 'B2', 'value': 'B2'},
-                                                                {'title': 'C1', 'value': 'C1'},
-                                                                {'title': 'C2', 'value': 'C2'},
-                                                                {'title': 'C2+', 'value': 'C2+'}
-                                                            ]
-                                                        }
-                                                    }
-                                                ]
-                                            },
-                                            {
-                                                'component': 'VCol',
-                                                'props': {
-                                                    'cols': 12,
-                                                    'md': 4
-                                                },
-                                                'content': [
-                                                    {
-                                                        'component': 'VSwitch',
-                                                        'props': {
-                                                            'model': 'english_only',
-                                                            'label': '仅英语影视剧',
-                                                            'hint': '检查入库影视剧原语言'
-                                                        }
-                                                    }
-                                                ]
-                                            },
-                                            {
-                                                'component': 'VCol',
-                                                'props': {
-                                                    'cols': 12,
-                                                    'md': 8
-                                                },
-                                                'content': [
-                                                    {
-                                                        'component': 'VSelect',
-                                                        'props': {
-                                                            'model': 'exam_tags',
-                                                            'label': '考试词汇标签',
-                                                            'chips': True,
-                                                            'multiple': True,
-                                                            'items': [
-                                                                {'title': '四级', 'value': 'CET-4'},
-                                                                {'title': '六级', 'value': 'CET-6'},
-                                                                {'title': '考研', 'value': 'NPEE'},
-                                                                {'title': '雅思', 'value': 'IELTS'},
-                                                                {'title': '托福', 'value': 'TOEFL'},
-                                                                {'title': '专四', 'value': 'TEM-4'},
-                                                                {'title': '专八', 'value': 'TEM-8'},
-                                                                {'title': 'GRE', 'value': 'GRE'},
-                                                                {'title': 'PET', 'value': 'PET'},
-                                                            ]
-                                                        }
-                                                    }
-                                                ]
-                                            }
-                                        ]
-                                    },
-                                    {
-                                        'component': 'VRow',
-                                        'content': [
-                                            {
-                                                'component': 'VCol',
-                                                'props': {
-                                                    'cols': 12,
-                                                },
-                                                'content': [
-                                                    {
-                                                        'component': 'VTextField',
-                                                        'props': {
-                                                            'model': 'ffmpeg_path',
-                                                            'label': 'FFmpeg 路径',
-                                                            'placeholder': 'ffmpeg'
-                                                        }
-                                                    }
-                                                ]
-                                            }
-                                        ]
                                     }
-                                ]
+                                ],
                             },
                             {
-                                'component': 'VWindowItem',
-                                'props': {
-                                    'value': 'subtitle_tab'
-                                },
-                                'content': [
+                                "component": "VCol",
+                                "props": {"cols": 12, "md": 3},
+                                "content": [
                                     {
-                                        'component': 'VRow',
-                                        'props': {
-                                            'style': {
-                                                'margin-top': '0px'
-                                            }
+                                        "component": "VSwitch",
+                                        "props": {
+                                            "model": "send_notify",
+                                            "label": "发送通知",
                                         },
-                                        'content': [
-                                            {
-                                                'component': 'VCol',
-                                                'props': {
-                                                    'cols': 12,
-                                                    'md': 4
-                                                },
-                                                'content': [
-                                                    {
-                                                        'component': 'VSelect',
-                                                        'props': {
-                                                            'model': 'font_scaling',
-                                                            'label': '字体缩放',
-                                                            'items': [
-                                                                {'title': '50%', 'value': '0.5'},
-                                                                {'title': '75%', 'value': '0.75'},
-                                                                {'title': '100%', 'value': '1'},
-                                                                {'title': '125%', 'value': '1.25'},
-                                                                {'title': '150%', 'value': '1.5'},
-                                                                {'title': '200%', 'value': '2'}
-                                                            ]
-                                                        }
-                                                    }
-                                                ]
-                                            },
-                                            {
-                                                'component': 'VCol',
-                                                'props': {
-                                                    'cols': 12,
-                                                    'md': 4
-                                                },
-                                                'content': [
-                                                    {
-                                                        'component': 'VTextField',
-                                                        'props': {
-                                                            'model': 'accent_color',
-                                                            'label': '强调色',
-                                                            'placeholder': '#FFFF00'
-                                                        }
-                                                    }
-                                                ]
-                                            },
-                                            {
-                                                'component': 'VCol',
-                                                'props': {
-                                                    'cols': 12,
-                                                    'md': 4
-                                                },
-                                                'content': [
-                                                    {
-                                                        'component': 'VSelect',
-                                                        'props': {
-                                                            'model': 'opacity',
-                                                            'label': '不透明度',
-                                                            'items': [
-                                                                {'title': '0', 'value': '0'},
-                                                                {'title': '25%', 'value': '63'},
-                                                                {'title': '50%', 'value': '127'},
-                                                                {'title': '75%', 'value': '191'},
-                                                                {'title': '100%', 'value': '255'},
-                                                            ]
-                                                        }
-                                                    }
-                                                ]
-                                            }
-                                        ]
-                                    },
-                                    {
-                                        'component': 'VRow',
-                                        'content': [
-                                            {
-                                                'component': 'VCol',
-                                                'props': {
-                                                    'cols': 12,
-                                                    'md': 4
-                                                },
-                                                'content': [
-                                                    {
-                                                        'component': 'VSwitch',
-                                                        'props': {
-                                                            'model': 'show_phonetics',
-                                                            'label': '标注音标',
-                                                        }
-                                                    }
-                                                ]
-                                            },
-                                            {
-                                                'component': 'VCol',
-                                                'props': {
-                                                    'cols': 12,
-                                                    'md': 4
-                                                },
-                                                'content': [
-                                                    {
-                                                        'component': 'VSwitch',
-                                                        'props': {
-                                                            'model': 'in_place',
-                                                            'label': '在原字幕插入注释',
-                                                        }
-                                                    }
-                                                ]
-                                            },
-
-                                            {
-                                                'component': 'VCol',
-                                                'props': {
-                                                    'cols': 12,
-                                                    'md': 4
-                                                },
-                                                'content': [
-                                                    {
-                                                        'component': 'VSwitch',
-                                                        'props': {
-                                                            'model': 'show_vocabulary_detail',
-                                                            'label': '显示完整释义',
-                                                        }
-                                                    }
-                                                ]
-                                            },
-
-                                        ]
-                                    },
-                                ]
+                                    }
+                                ],
                             },
                             {
-                                'component': 'VWindowItem',
-                                'props': {
-                                    'value': 'gemini_tab'
-                                },
-                                'content': [
+                                "component": "VCol",
+                                "props": {"cols": 12, "md": 3},
+                                "content": [
                                     {
-                                        'component': 'VRow',
-                                        'content': [
-                                            {
-                                                'component': 'VCol',
-                                                'props': {
-                                                    'cols': 12,
-                                                    'md': 6,
-                                                },
-                                                'content': [
-                                                    {
-                                                        'component': 'VSwitch',
-                                                        'props': {
-                                                            'model': 'enable_gemini',
-                                                            'label': '启用Gemini翻译',
-                                                        }
-                                                    }
-                                                ]
-                                            },
-                                            {
-                                                'component': 'VCol',
-                                                'props': {
-                                                    'cols': 12,
-                                                    'md': 6
-                                                },
-                                                'content': [
-                                                    {
-                                                        'component': 'VSwitch',
-                                                        'props': {
-                                                            'model': 'sentence_translation',
-                                                            'label': '整句翻译',
-                                                        }
-                                                    }
-                                                ]
-                                            }
-                                        ]
-                                    },
-                                    {
-                                        'component': 'VRow',
-                                        'content': [
-                                            {
-                                                'component': 'VCol',
-                                                'props': {
-                                                    'cols': 12,
-                                                    'md': 6,
-                                                },
-                                                'content': [
-                                                    {
-                                                        'component': 'VSelect',
-                                                        'props': {
-                                                            'model': 'gemini_model',
-                                                            'label': '模型',
-                                                            'items': [
-                                                                {'title': 'gemini-2.5-flash',
-                                                                 'value': 'gemini-2.5-flash'},
-                                                                {'title': 'gemini-2.5-flash-lite',
-                                                                 'value': 'gemini-2.5-flash-lite'},
-                                                                {'title': 'gemini-2.5-pro',
-                                                                 'value': 'gemini-2.5-pro'},
-                                                                {'title': 'gemini-2.0-flash',
-                                                                 'value': 'gemini-2.0-flash'},
-                                                                {'title': 'gemini-2.0-flash-lite',
-                                                                 'value': 'gemini-2.0-flash-lite'},
-                                                            ]
-                                                        }
-                                                    }
-                                                ]
-                                            },
-                                            {
-                                                'component': 'VCol',
-                                                'props': {
-                                                    'cols': 12,
-                                                    'md': 6,
-                                                },
-                                                'content': [
-                                                    {
-                                                        'component': 'VTextField',
-                                                        'props': {
-                                                            'model': 'gemini_apikey',
-                                                            'label': 'Gemini APIKEY',
-                                                            'placeholder': ''
-                                                        }
-                                                    }
-                                                ]
-                                            },
-                                        ]
-                                    },
-                                    {
-                                        'component': 'VRow',
-                                        'content': [
-                                            {
-                                                'component': 'VCol',
-                                                'props': {
-                                                    'cols': 12,
-                                                    'md': 3,
-                                                },
-                                                'content': [
-                                                    {
-                                                        'component': 'VTextField',
-                                                        'props': {
-                                                            'model': 'context_window',
-                                                            'label': '上下文窗口大小',
-                                                            'placeholder': '10',
-                                                            'type': 'number',
-                                                            'max': 100,
-                                                            'min': 1,
-                                                            'hint': '向Gemini发送的上下文长度'
-                                                        }
-                                                    }
-                                                ]
-                                            },
-                                            {
-                                                'component': 'VCol',
-                                                'props': {
-                                                    'cols': 12,
-                                                    'md': 3
-                                                },
-                                                'content': [
-                                                    {
-                                                        'component': 'VSelect',
-                                                        'props': {
-                                                            'model': 'model_temperature',
-                                                            'label': '模型温度',
-                                                            'items': [
-                                                                {'title': '0', 'value': '0'},
-                                                                {'title': '0.1', 'value': '0.1'},
-                                                                {'title': '0.2', 'value': '0.2'},
-                                                                {'title': '0.3', 'value': '0.3'},
-                                                            ]
-                                                        }
-                                                    }
-                                                ]
-                                            },
-                                            {
-                                                'component': 'VCol',
-                                                'props': {
-                                                    'cols': 12,
-                                                    'md': 3,
-                                                },
-                                                'content': [
-                                                    {
-                                                        'component': 'VTextField',
-                                                        'props': {
-                                                            'model': 'max_retries',
-                                                            'label': '请求重试次数',
-                                                            'placeholder': '3',
-                                                            'type': 'number',
-                                                            'min': 1,
-                                                            'hint': '请求失败重试次数'
-                                                        }
-                                                    }
-                                                ]
-                                            },
-                                            {
-                                                'component': 'VCol',
-                                                'props': {
-                                                    'cols': 12,
-                                                    'md': 3,
-                                                },
-                                                'content': [
-                                                    {
-                                                        'component': 'VTextField',
-                                                        'props': {
-                                                            'model': 'request_interval',
-                                                            'label': '请求间隔',
-                                                            'type': 'number',
-                                                            'placeholder': 5,
-                                                            'min': 1,
-                                                            'suffix': '秒',
-                                                            'hint': '请求间隔时间，建议不少于3秒'
-                                                        }
-                                                    }
-                                                ]
-                                            },
-                                        ]
-                                    }
-                                ]
-                            }
-                        ]
-                    },
-                    {
-                        'component': 'VRow',
-                        'props': {
-                            'style': {
-                                'margin-top': '0px'
-                            }
-                        },
-                        'content': [
-                            {
-                                'component': 'VCol',
-                                'props': {
-                                    'cols': 12,
-                                },
-                                'content': [
-                                    {
-                                        'component': 'VSelect',
-                                        'props': {
-                                            'chips': True,
-                                            'multiple': True,
-                                            'model': 'libraries',
-                                            'label': '监控入库',
-                                            'items': library_options
-                                        }
-                                    }
-                                ]
-                            }
-                        ]
-                    },
-                    {
-                        'component': 'VRow',
-                        'props': {
-                            'style': {
-                                'margin-top': '0px'
-                            }
-                        },
-                        'content': [
-                            {
-                                'component': 'VCol',
-                                'props': {
-                                    'cols': 12,
-                                },
-                                'content': [
-                                    {
-                                        'component': 'VTextarea',
-                                        'props': {
-                                            'model': 'custom_files',
-                                            'label': '手动处理视频路径',
-                                            'rows': 3,
-                                            'placeholder': '# 每行一个文件'
-                                        }
-                                    }
-                                ]
-                            },
-                        ]
-                    },
-                    {
-                        'component': 'VRow',
-                        'content': [
-                            {
-                                'component': 'VCol',
-                                'props': {
-                                    'cols': 12,
-                                },
-                                'content': [
-                                    {
-                                        'component': 'VAlert',
-                                        'props': {
-                                            'type': 'success',
-                                            'variant': 'tonal'
+                                        "component": "VSwitch",
+                                        "props": {
+                                            "model": "onlyonce",
+                                            "label": "手动运行一次",
                                         },
-                                        'content': [
+                                    }
+                                ],
+                            },
+                            {
+                                "component": "VCol",
+                                "props": {"cols": 12, "md": 3},
+                                "content": [
+                                    {
+                                        "component": "VSwitch",
+                                        "props": {
+                                            "model": "delete_data",
+                                            "label": "插件数据清理",
+                                        },
+                                    }
+                                ],
+                            },
+                        ],
+                    },
+                    {
+                        "component": "VTabs",
+                        "props": {
+                            "model": "_tabs",
+                            "style": {"margin-top": "8px", "margin-bottom": "16px"},
+                            "stacked": True,
+                            "fixed-tabs": True,
+                        },
+                        "content": [
+                            {
+                                "component": "VTab",
+                                "props": {"value": "base_tab"},
+                                "text": "基本设置",
+                            },
+                            {
+                                "component": "VTab",
+                                "props": {"value": "subtitle_tab"},
+                                "text": "字幕设置",
+                            },
+                            {
+                                "component": "VTab",
+                                "props": {"value": "gemini_tab"},
+                                "text": "LLM 设置",
+                            },
+                        ],
+                    },
+                    {
+                        "component": "VWindow",
+                        "props": {"model": "_tabs"},
+                        "content": [
+                            {
+                                "component": "VWindowItem",
+                                "props": {"value": "base_tab"},
+                                "content": [
+                                    {
+                                        "component": "VRow",
+                                        "props": {"style": {"margin-top": "0px"}},
+                                        "content": [
                                             {
-                                                'component': 'span',
-                                                'text': '配置说明：'
+                                                "component": "VCol",
+                                                "props": {"cols": 12, "md": 4},
+                                                "content": [
+                                                    {
+                                                        "component": "VSwitch",
+                                                        "props": {
+                                                            "model": "when_file_trans",
+                                                            "label": "监控入库",
+                                                        },
+                                                    }
+                                                ],
                                             },
                                             {
-                                                'component': 'a',
-                                                'props': {
-                                                    'href': 'https://github.com/jxxghp/MoviePilot-Plugins/tree/main/plugins.v2/lexiannot/README.md',
-                                                    'target': '_blank'
-                                                },
-                                                'content': [
+                                                "component": "VCol",
+                                                "props": {"cols": 12, "md": 4},
+                                                "content": [
                                                     {
-                                                        'component': 'u',
-                                                        'text': 'README'
+                                                        "component": "VSelect",
+                                                        "props": {
+                                                            "model": "spacy_model",
+                                                            "label": "spaCy模型",
+                                                            "hint": "用于分词和词性标注，推荐使用「md」",
+                                                            "items": [
+                                                                {
+                                                                    "title": "sm (~12 MB)",
+                                                                    "value": "en_core_web_sm",
+                                                                },
+                                                                {
+                                                                    "title": "md (~30 MB)",
+                                                                    "value": "en_core_web_md",
+                                                                },
+                                                                {
+                                                                    "title": "lg (700+ MB)",
+                                                                    "value": "en_core_web_lg",
+                                                                },
+                                                                {
+                                                                    "title": "Transformer (400+ MB)",
+                                                                    "value": "en_core_web_trf",
+                                                                },
+                                                            ],
+                                                        },
                                                     }
-                                                ]
+                                                ],
+                                            },
+                                            {
+                                                "component": "VCol",
+                                                "props": {"cols": 12, "md": 4},
+                                                "content": [
+                                                    {
+                                                        "component": "VSelect",
+                                                        "props": {
+                                                            "model": "annot_level",
+                                                            "label": "标注词汇的最低CEFR等级",
+                                                            "items": [
+                                                                {
+                                                                    "title": "B1",
+                                                                    "value": "B1",
+                                                                },
+                                                                {
+                                                                    "title": "B2",
+                                                                    "value": "B2",
+                                                                },
+                                                                {
+                                                                    "title": "C1",
+                                                                    "value": "C1",
+                                                                },
+                                                                {
+                                                                    "title": "C2",
+                                                                    "value": "C2",
+                                                                },
+                                                                {
+                                                                    "title": "C2+",
+                                                                    "value": "C2+",
+                                                                },
+                                                            ],
+                                                        },
+                                                    }
+                                                ],
+                                            },
+                                            {
+                                                "component": "VCol",
+                                                "props": {"cols": 12, "md": 4},
+                                                "content": [
+                                                    {
+                                                        "component": "VSwitch",
+                                                        "props": {
+                                                            "model": "english_only",
+                                                            "label": "仅英语影视剧",
+                                                            "hint": "检查入库影视剧原语言",
+                                                        },
+                                                    }
+                                                ],
+                                            },
+                                            {
+                                                "component": "VCol",
+                                                "props": {"cols": 12, "md": 8},
+                                                "content": [
+                                                    {
+                                                        "component": "VSelect",
+                                                        "props": {
+                                                            "model": "exam_tags",
+                                                            "label": "考试词汇标签",
+                                                            "chips": True,
+                                                            "multiple": True,
+                                                            "items": [
+                                                                {
+                                                                    "title": "四级",
+                                                                    "value": "CET-4",
+                                                                },
+                                                                {
+                                                                    "title": "六级",
+                                                                    "value": "CET-6",
+                                                                },
+                                                                {
+                                                                    "title": "考研",
+                                                                    "value": "NPEE",
+                                                                },
+                                                                {
+                                                                    "title": "雅思",
+                                                                    "value": "IELTS",
+                                                                },
+                                                                {
+                                                                    "title": "托福",
+                                                                    "value": "TOEFL",
+                                                                },
+                                                                {
+                                                                    "title": "专四",
+                                                                    "value": "TEM-4",
+                                                                },
+                                                                {
+                                                                    "title": "专八",
+                                                                    "value": "TEM-8",
+                                                                },
+                                                                {
+                                                                    "title": "GRE",
+                                                                    "value": "GRE",
+                                                                },
+                                                                {
+                                                                    "title": "PET",
+                                                                    "value": "PET",
+                                                                },
+                                                            ],
+                                                        },
+                                                    }
+                                                ],
+                                            },
+                                        ],
+                                    },
+                                    {
+                                        "component": "VRow",
+                                        "content": [
+                                            {
+                                                "component": "VCol",
+                                                "props": {
+                                                    "cols": 12,
+                                                },
+                                                "content": [
+                                                    {
+                                                        "component": "VTextField",
+                                                        "props": {
+                                                            "model": "ffmpeg_path",
+                                                            "label": "FFmpeg 路径",
+                                                            "placeholder": "ffmpeg",
+                                                        },
+                                                    }
+                                                ],
                                             }
-                                        ]
+                                        ],
+                                    },
+                                ],
+                            },
+                            {
+                                "component": "VWindowItem",
+                                "props": {"value": "subtitle_tab"},
+                                "content": [
+                                    {
+                                        "component": "VRow",
+                                        "props": {"style": {"margin-top": "0px"}},
+                                        "content": [
+                                            {
+                                                "component": "VCol",
+                                                "props": {"cols": 12, "md": 4},
+                                                "content": [
+                                                    {
+                                                        "component": "VSelect",
+                                                        "props": {
+                                                            "model": "font_scaling",
+                                                            "label": "字体缩放",
+                                                            "items": [
+                                                                {
+                                                                    "title": "50%",
+                                                                    "value": "0.5",
+                                                                },
+                                                                {
+                                                                    "title": "75%",
+                                                                    "value": "0.75",
+                                                                },
+                                                                {
+                                                                    "title": "100%",
+                                                                    "value": "1",
+                                                                },
+                                                                {
+                                                                    "title": "125%",
+                                                                    "value": "1.25",
+                                                                },
+                                                                {
+                                                                    "title": "150%",
+                                                                    "value": "1.5",
+                                                                },
+                                                                {
+                                                                    "title": "200%",
+                                                                    "value": "2",
+                                                                },
+                                                            ],
+                                                        },
+                                                    }
+                                                ],
+                                            },
+                                            {
+                                                "component": "VCol",
+                                                "props": {"cols": 12, "md": 4},
+                                                "content": [
+                                                    {
+                                                        "component": "VTextField",
+                                                        "props": {
+                                                            "model": "accent_color",
+                                                            "label": "强调色",
+                                                            "placeholder": "#FFFF00",
+                                                        },
+                                                    }
+                                                ],
+                                            },
+                                            {
+                                                "component": "VCol",
+                                                "props": {"cols": 12, "md": 4},
+                                                "content": [
+                                                    {
+                                                        "component": "VSelect",
+                                                        "props": {
+                                                            "model": "opacity",
+                                                            "label": "透明度",
+                                                            "items": [
+                                                                {
+                                                                    "title": "0",
+                                                                    "value": "0",
+                                                                },
+                                                                {
+                                                                    "title": "25%",
+                                                                    "value": "63",
+                                                                },
+                                                                {
+                                                                    "title": "50%",
+                                                                    "value": "127",
+                                                                },
+                                                                {
+                                                                    "title": "75%",
+                                                                    "value": "191",
+                                                                },
+                                                                {
+                                                                    "title": "100%",
+                                                                    "value": "255",
+                                                                },
+                                                            ],
+                                                        },
+                                                    }
+                                                ],
+                                            },
+                                        ],
+                                    },
+                                    {
+                                        "component": "VRow",
+                                        "content": [
+                                            {
+                                                "component": "VCol",
+                                                "props": {"cols": 12, "md": 4},
+                                                "content": [
+                                                    {
+                                                        "component": "VSwitch",
+                                                        "props": {
+                                                            "model": "show_phonetics",
+                                                            "label": "标注音标",
+                                                        },
+                                                    }
+                                                ],
+                                            },
+                                            {
+                                                "component": "VCol",
+                                                "props": {"cols": 12, "md": 4},
+                                                "content": [
+                                                    {
+                                                        "component": "VSwitch",
+                                                        "props": {
+                                                            "model": "in_place",
+                                                            "label": "在原字幕插入注释",
+                                                        },
+                                                    }
+                                                ],
+                                            },
+                                            {
+                                                "component": "VCol",
+                                                "props": {"cols": 12, "md": 4},
+                                                "content": [
+                                                    {
+                                                        "component": "VSwitch",
+                                                        "props": {
+                                                            "model": "show_vocabulary_detail",
+                                                            "label": "显示完整释义",
+                                                        },
+                                                    }
+                                                ],
+                                            },
+                                        ],
+                                    },
+                                ],
+                            },
+                            {
+                                "component": "VWindowItem",
+                                "props": {"value": "gemini_tab"},
+                                "content": [
+                                    {
+                                        "component": "VRow",
+                                        "content": [
+                                            {
+                                                "component": "VCol",
+                                                "props": {
+                                                    "cols": 12,
+                                                    "md": 3,
+                                                },
+                                                "content": [
+                                                    {
+                                                        "component": "VSwitch",
+                                                        "props": {
+                                                            "model": "enable_gemini",
+                                                            "label": "启用 LLM",
+                                                        },
+                                                    }
+                                                ],
+                                            },
+                                            {
+                                                "component": "VCol",
+                                                "props": {
+                                                    "cols": 12,
+                                                    "md": 3,
+                                                },
+                                                "content": [
+                                                    {
+                                                        "component": "VSwitch",
+                                                        "props": {
+                                                            "model": "use_mp_agent",
+                                                            "label": "使用系统 Agent 配置",
+                                                        },
+                                                    }
+                                                ],
+                                            },
+                                            {
+                                                "component": "VCol",
+                                                "props": {
+                                                    "cols": 12,
+                                                    "md": 3,
+                                                },
+                                                "content": [
+                                                    {
+                                                        "component": "VSwitch",
+                                                        "props": {
+                                                            "model": "use_proxy",
+                                                            "label": "使用系统代理",
+                                                        },
+                                                    }
+                                                ],
+                                            },
+                                            {
+                                                "component": "VCol",
+                                                "props": {"cols": 12, "md": 3},
+                                                "content": [
+                                                    {
+                                                        "component": "VSwitch",
+                                                        "props": {
+                                                            "model": "sentence_translation",
+                                                            "label": "整句翻译",
+                                                        },
+                                                    }
+                                                ],
+                                            },
+                                        ],
+                                    },
+                                    {
+                                        "component": "VRow",
+                                        "content": [
+                                            {
+                                                "component": "VCol",
+                                                "props": {
+                                                    "cols": 12,
+                                                    "md": 6,
+                                                },
+                                                "content": [
+                                                    {
+                                                        "component": "VSelect",
+                                                        "props": {
+                                                            "model": "llm_provider",
+                                                            "label": "提供商",
+                                                            "disabled": "use_mp_agent",
+                                                            "items": [
+                                                                {
+                                                                    "title": "Google",
+                                                                    "value": "google",
+                                                                },
+                                                                {
+                                                                    "title": "OpenAI",
+                                                                    "value": "openai",
+                                                                },
+                                                                {
+                                                                    "title": "DeepSeek",
+                                                                    "value": "deepseek",
+                                                                },
+                                                            ],
+                                                        },
+                                                    }
+                                                ],
+                                            },
+                                            {
+                                                "component": "VCol",
+                                                "props": {
+                                                    "cols": 12,
+                                                    "md": 6,
+                                                },
+                                                "content": [
+                                                    {
+                                                        "component": "VTextField",
+                                                        "props": {
+                                                            "model": "llm_base_url",
+                                                            "disabled": "use_mp_agent",
+                                                            "placeholder": "https://api.deepseek.com",
+                                                            "label": "基础 URL",
+                                                            "hint": "参考 MoviePilot Agent 配置",
+                                                        },
+                                                    }
+                                                ],
+                                            },
+                                            {
+                                                "component": "VCol",
+                                                "props": {
+                                                    "cols": 12,
+                                                    "md": 6,
+                                                },
+                                                "content": [
+                                                    {
+                                                        "component": "VCombobox",
+                                                        "props": {
+                                                            "model": "gemini_model",
+                                                            "disabled": "use_mp_agent",
+                                                            "label": "模型名称",
+                                                            "items": [
+                                                                "gemini-2.5-flash",
+                                                                "gemini-2.5-flash-lite",
+                                                                "gemini-2.5-pro",
+                                                                "gemini-2.0-flash",
+                                                                "gemini-2.0-flash-lite",
+                                                                "deepseek-ai/DeepSeek-V3.2",
+                                                                "deepseek-ai/DeepSeek-R1"
+                                                            ],
+                                                        },
+                                                    }
+                                                ],
+                                            },
+                                            {
+                                                "component": "VCol",
+                                                "props": {
+                                                    "cols": 12,
+                                                    "md": 6,
+                                                },
+                                                "content": [
+                                                    {
+                                                        "component": "VTextField",
+                                                        "props": {
+                                                            "model": "gemini_apikey",
+                                                            "label": "API-KEY",
+                                                            "disabled": "use_mp_agent",
+                                                        },
+                                                    }
+                                                ],
+                                            },
+                                        ],
+                                    },
+                                    {
+                                        "component": "VRow",
+                                        "content": [
+                                            {
+                                                "component": "VCol",
+                                                "props": {
+                                                    "cols": 12,
+                                                    "md": 4,
+                                                },
+                                                "content": [
+                                                    {
+                                                        "component": "VTextField",
+                                                        "props": {
+                                                            "model": "context_window",
+                                                            "label": "上下文窗口大小",
+                                                            "placeholder": "10",
+                                                            "type": "number",
+                                                            "max": 50,
+                                                            "min": 1,
+                                                            "hint": "向大模型发送的对话数量",
+                                                        },
+                                                    }
+                                                ],
+                                            },
+                                            {
+                                                "component": "VCol",
+                                                "props": {"cols": 12, "md": 4},
+                                                "content": [
+                                                    {
+                                                        "component": "VSelect",
+                                                        "props": {
+                                                            "model": "model_temperature",
+                                                            "label": "模型温度",
+                                                            "items": [
+                                                                {"title": "0", "value": "0"},
+                                                                {"title": "0.1", "value": "0.1"},
+                                                                {"title": "0.2", "value": "0.2"},
+                                                                {"title": "0.3", "value": "0.3"},
+                                                                {"title": "0.4", "value": "0.4"},
+                                                                {"title": "0.5", "value": "0.5"},
+                                                            ],
+                                                        },
+                                                    }
+                                                ],
+                                            },
+                                            {
+                                                "component": "VCol",
+                                                "props": {
+                                                    "cols": 12,
+                                                    "md": 4,
+                                                },
+                                                "content": [
+                                                    {
+                                                        "component": "VTextField",
+                                                        "props": {
+                                                            "model": "max_retries",
+                                                            "label": "请求重试次数",
+                                                            "placeholder": "3",
+                                                            "type": "number",
+                                                            "min": 1,
+                                                            "hint": "请求失败重试次数",
+                                                        },
+                                                    }
+                                                ],
+                                            },
+                                        ],
+                                    },
+                                ],
+                            },
+                        ],
+                    },
+                    {
+                        "component": "VRow",
+                        "props": {"style": {"margin-top": "0px"}},
+                        "content": [
+                            {
+                                "component": "VCol",
+                                "props": {
+                                    "cols": 12,
+                                },
+                                "content": [
+                                    {
+                                        "component": "VSelect",
+                                        "props": {
+                                            "chips": True,
+                                            "multiple": True,
+                                            "model": "libraries",
+                                            "label": "监控入库",
+                                            "items": library_options,
+                                        },
                                     }
-                                ]
+                                ],
                             }
-                        ]
-                    }
-                ]
+                        ],
+                    },
+                    {
+                        "component": "VRow",
+                        "props": {"style": {"margin-top": "0px"}},
+                        "content": [
+                            {
+                                "component": "VCol",
+                                "props": {
+                                    "cols": 12,
+                                },
+                                "content": [
+                                    {
+                                        "component": "VTextarea",
+                                        "props": {
+                                            "model": "custom_files",
+                                            "label": "手动处理视频路径",
+                                            "rows": 3,
+                                            "placeholder": "# 每行一个文件",
+                                        },
+                                    }
+                                ],
+                            },
+                        ],
+                    },
+                    {
+                        "component": "VRow",
+                        "content": [
+                            {
+                                "component": "VCol",
+                                "props": {
+                                    "cols": 12,
+                                },
+                                "content": [
+                                    {
+                                        "component": "VAlert",
+                                        "props": {
+                                            "type": "success",
+                                            "variant": "tonal",
+                                        },
+                                        "content": [
+                                            {"component": "span", "text": "配置说明："},
+                                            {
+                                                "component": "a",
+                                                "props": {
+                                                    "href": "https://github.com/jxxghp/MoviePilot-Plugins/tree/main/plugins.v2/lexiannot/README.md",
+                                                    "target": "_blank",
+                                                },
+                                                "content": [
+                                                    {"component": "u", "text": "README"}
+                                                ],
+                                            },
+                                        ],
+                                    }
+                                ],
+                            }
+                        ],
+                    },
+                ],
             }
         ], {
             "enabled": False,
-            "annot_level": 'C1',
+            "annot_level": "C1",
             "send_notify": False,
             "onlyonce": False,
             "show_vocabulary_detail": False,
@@ -907,42 +953,52 @@ class LexiAnnot(_PluginBase):
             "sentence_translation": False,
             "in_place": False,
             "enable_gemini": False,
-            "gemini_model": 'gemini-2.0-flash',
-            "gemini_apikey": '',
+            "gemini_model": "gemini-2.0-flash",
+            "gemini_apikey": "",
             "context_window": 10,
             "max_retries": 3,
-            'request_interval': 3,
+            "request_interval": 3,
             "ffmpeg_path": "",
             "english_only": True,
             "when_file_trans": True,
-            "model_temperature": '0.3',
-            "custom_files": '',
-            "accent_color": '',
-            "font_scaling": '1',
-            "opacity": '0',
-            "spacy_model": 'en_core_web_sm',
+            "model_temperature": "0.1",
+            "custom_files": "",
+            "accent_color": "",
+            "font_scaling": "1",
+            "opacity": "0",
+            "spacy_model": "en_core_web_sm",
             "exam_tags": [],
             "delete_data": False,
-            "libraries": []
+            "libraries": [],
+            "llm_provider": "google",
+            "llm_base_url": "",
+            "use_mp_agent": False,
+            "use_proxy": False,
         }
 
     def get_api(self) -> List[Dict[str, Any]]:
-        pass
+        return [
+            {
+                "path": "/tasks",
+                "endpoint": self.task_interface,
+                "methods": ["POST"],
+                "summary": "任务操作",
+                "description": "任务操作",
+            }
+        ]
 
     def get_page(self) -> List[dict]:
         headers = [
-            {'title': '添加时间', 'key': 'add_time', 'sortable': True},
-            {'title': '视频文件', 'key': 'video_path', 'sortable': True},
-            {'title': '消耗 Tokens', 'key': 'tokens_used', 'sortable': True},
-            {'title': '完成时间', 'key': 'complete_time', 'sortable': True},
-            {'title': '任务状态', 'key': 'status', 'sortable': True},
+            {"title": "添加时间", "key": "add_time", "sortable": True},
+            {"title": "视频文件", "key": "video_path", "sortable": True},
+            {"title": "消耗 Tokens", "key": "tokens_used", "sortable": True},
+            {"title": "完成时间", "key": "complete_time", "sortable": True},
+            {"title": "任务状态", "key": "status", "sortable": True},
         ]
         items = []
         with self._tasks_lock:
             sorted_tasks = sorted(
-                self._tasks.items(),
-                key=lambda x: x[1].add_time,
-                reverse=True
+                self._tasks.items(), key=lambda x: x[1].add_time, reverse=True
             )
 
         status_map = {
@@ -951,52 +1007,82 @@ class LexiAnnot(_PluginBase):
             TaskStatus.COMPLETED: "已完成",
             TaskStatus.IGNORED: "已忽略",
             TaskStatus.FAILED: "失败",
-            TaskStatus.CANCELED: "已取消"
+            TaskStatus.CANCELED: "已取消",
         }
 
         for task_id, task in sorted_tasks:
             status_text = status_map.get(task.status, task.status)
             item = {
-                'task_id': task_id,
-                'status': status_text,
-                'video_path': task.video_path,
-                'add_time': task.add_time.strftime("%Y-%m-%d %H:%M:%S") if task.add_time else '-',
-                'tokens_used': task.tokens_used,
-                'complete_time': task.complete_time.strftime("%Y-%m-%d %H:%M:%S") if task.complete_time else '-',
+                "task_id": task_id,
+                "status": status_text,
+                "video_path": task.video_path,
+                "add_time": task.add_time if task.add_time else "-",
+                "tokens_used": task.tokens_used,
+                "complete_time": task.complete_time if task.complete_time else "-",
             }
             items.append(item)
         return [
             {
-                'component': 'VRow',
-                'props': {
-                    'style': {
-                        'overflow': 'hidden',
+                "component": "div",
+                "props": {"class": "d-flex align-center"},
+                "content": [
+                    {
+                        "component": "h2",
+                        "props": {"class": "page-title m-0"},
+                        "text": "任务记录",
+                    },
+                    {"component": "VSpacer"},
+                    {
+                        "component": "VBtn",
+                        "props": {
+                            "prepend-icon": "mdi-delete-circle",
+                            "variant": "tonal",
+                        },
+                        "text": "清空任务记录",
+                        "events": {
+                            "click": {
+                                "api": f"plugin/{self.__class__.__name__}/tasks?apikey={settings.API_TOKEN}",
+                                "method": "post",
+                                "params": {
+                                    "operation": "DELETE",
+                                    "task_id": None,
+                                },
+                            }
+                        },
+                    },
+                ],
+            },
+            {
+                "component": "VRow",
+                "props": {
+                    "style": {
+                        "overflow": "hidden",
                     }
                 },
-                'content': [
+                "content": [
                     {
-                        'component': 'VCol',
-                        'props': {
-                            'cols': 12,
+                        "component": "VCol",
+                        "props": {
+                            "cols": 12,
                         },
-                        'content': [
+                        "content": [
                             {
-                                'component': 'VDataTableVirtual',
-                                'props': {
-                                    'class': 'text-sm',
-                                    'headers': headers,
-                                    'items': items,
-                                    'height': '30rem',
-                                    'density': 'compact',
-                                    'fixed-header': True,
-                                    'hide-no-data': True,
-                                    'hover': True
-                                }
+                                "component": "VDataTableVirtual",
+                                "props": {
+                                    "class": "text-sm",
+                                    "headers": headers,
+                                    "items": items,
+                                    "height": "30rem",
+                                    "density": "compact",
+                                    "fixed-header": True,
+                                    "hide-no-data": True,
+                                    "hover": True,
+                                },
                             }
-                        ]
+                        ],
                     }
-                ]
-            }
+                ],
+            },
         ]
 
     @staticmethod
@@ -1009,6 +1095,13 @@ class LexiAnnot(_PluginBase):
         """
         return self._enabled
 
+    def get_agent_tools(self) -> List[type]:
+        """
+        获取插件智能体工具
+        返回工具类列表，每个工具类必须继承自 MoviePilotTool
+        """
+        return [VocabularyAnnotatingTool]
+
     def stop_service(self):
         """
         退出插件
@@ -1034,7 +1127,7 @@ class LexiAnnot(_PluginBase):
     def delete_data(self):
         # 删除词典
         data_path = self.get_data_path()
-        lexicon_path = data_path / 'lexicon.json'
+        lexicon_path = data_path / "lexicon.json"
         try:
             os.remove(lexicon_path)
             logger.info(f"词典 {lexicon_path} 已删除")
@@ -1042,7 +1135,7 @@ class LexiAnnot(_PluginBase):
             pass
         except Exception as e:
             logger.error(f"词典 {lexicon_path} 删除失败: {e}")
-        self.__load_lexicon_from_local.cache_clear()
+        self._load_lexicon_from_local.cache_clear()
 
         # 删除虚拟环境
         venv_dir = data_path / "venv_genai"
@@ -1059,19 +1152,11 @@ class LexiAnnot(_PluginBase):
             self.save_tasks()
 
     def load_tasks(self) -> Dict[str, Task]:
-        raw_tasks = self.get_data('tasks') or {}
+        raw_tasks = self.get_data("tasks") or {}
         tasks = {}
         for task_id, task_dict in raw_tasks.items():
             try:
-                task = Task(
-                    video_path=task_dict.get('video_path'),
-                    task_id=task_dict.get('task_id'),
-                    status=TaskStatus(task_dict.get('status')),
-                    add_time=datetime.fromisoformat(task_dict.get('add_time')) if task_dict.get('add_time') else None,
-                    complete_time=datetime.fromisoformat(task_dict.get('complete_time')) if task_dict.get(
-                        'complete_time') else None,
-                    tokens_used=task_dict.get('tokens_used', 0)
-                )
+                task = Task.model_validate(task_dict)
                 tasks[task_id] = task
             except Exception as e:
                 logger.error(f"加载任务失败：{e}")
@@ -1079,18 +1164,25 @@ class LexiAnnot(_PluginBase):
 
     def save_tasks(self):
         with self._tasks_lock:
-            tasks_dict = {task_id: task.to_dict() for task_id, task in self._tasks.items()}
+            tasks_dict = {
+                task_id: task.model_dump(mode="json")
+                for task_id, task in self._tasks.items()
+            }
         self.save_data("tasks", tasks_dict)
 
-    def add_task(self, video_file: str):
-        task = Task(video_path=video_file, add_time=datetime.now())
+    def add_task(self, video_file: str, skip_existing=True):
+        task = Task(
+            video_path=video_file,
+            add_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+            params=TaskParams(skip_existing=skip_existing),
+        )
         with self._tasks_lock:
             self._tasks[task.task_id] = task
         self._task_queue.put(task)
         self.save_tasks()
         logger.info(f"加入任务队列: {video_file}")
 
-    def add_media_file(self, path: str):
+    def add_media_file(self, path: str, skip_existing: bool = True):
         """
         添加新任务
         """
@@ -1099,39 +1191,118 @@ class LexiAnnot(_PluginBase):
         else:
             raise RuntimeError("Plugin is shutting down. Cannot add new tasks.")
 
+    def delete_tasks(self, task_id: str | None):
+        historical_status = {
+            TaskStatus.COMPLETED,
+            TaskStatus.FAILED,
+            TaskStatus.CANCELED,
+            TaskStatus.IGNORED,
+        }
+        with self._tasks_lock:
+            if task_id is None:
+                tasks_to_delete = [
+                    task_id
+                    for task_id, task in self._tasks.items()
+                    if task.status in historical_status
+                ]
+            else:
+                task = self._tasks.get(task_id)
+                if task and task.status in historical_status:
+                    tasks_to_delete = [task_id]
+                else:
+                    tasks_to_delete = []
+            for task_id in tasks_to_delete:
+                del self._tasks[task_id]
+        self.save_tasks()
+
+    def task_interface(self, params: TasksApiParams) -> Response:
+        if params.operation == "DELETE":
+            logger.info("清空任务记录")
+            self.delete_tasks(params.task_id)
+        return Response(success=True)
+
     def __update_config(self):
         with self._config_updating_lock:
             self.update_config(
                 {
-                    'enabled': self._enabled,
-                    'annot_level': self._annot_level,
-                    'send_notify': self._send_notify,
-                    'onlyonce': self._onlyonce,
-                    'show_vocabulary_detail': self._show_vocabulary_detail,
-                    'sentence_translation': self._sentence_translation,
-                    'in_place': self._in_place,
-                    'enable_gemini': self._enable_gemini,
-                    'gemini_model': self._gemini_model,
-                    'gemini_apikey': self._gemini_apikey,
-                    'context_window': self._context_window,
-                    'max_retries': self._max_retries,
-                    'request_interval': self._request_interval,
-                    'ffmpeg_path': self._ffmpeg_path,
-                    'english_only': self._english_only,
-                    'when_file_trans': self._when_file_trans,
-                    'model_temperature': self._model_temperature,
-                    'show_phonetics': self._show_phonetics,
-                    'custom_files': self._custom_files,
-                    'accent_color': self._accent_color,
-                    'font_scaling': self._font_scaling,
-                    'opacity': self._opacity,
-                    'spacy_model': self._spacy_model,
-                    'exam_tags': self._exam_tags,
-                    'delete_data': self._delete_data,
-                    'libraries': self._libraries
+                    "enabled": self._enabled,
+                    "annot_level": self._annot_level,
+                    "send_notify": self._send_notify,
+                    "onlyonce": self._onlyonce,
+                    "show_vocabulary_detail": self._show_vocabulary_detail,
+                    "sentence_translation": self._sentence_translation,
+                    "in_place": self._in_place,
+                    "enable_gemini": self._enable_gemini,
+                    "gemini_model": self._gemini_model,
+                    "gemini_apikey": self._gemini_apikey,
+                    "context_window": self._context_window,
+                    "max_retries": self._max_retries,
+                    "ffmpeg_path": self._ffmpeg_path,
+                    "english_only": self._english_only,
+                    "when_file_trans": self._when_file_trans,
+                    "model_temperature": self._model_temperature,
+                    "show_phonetics": self._show_phonetics,
+                    "custom_files": self._custom_files,
+                    "accent_color": self._accent_color,
+                    "font_scaling": self._font_scaling,
+                    "opacity": self._opacity,
+                    "spacy_model": self._spacy_model,
+                    "exam_tags": self._exam_tags,
+                    "delete_data": self._delete_data,
+                    "libraries": self._libraries,
+                    "llm_provider": self._llm_provider,
+                    "llm_base_url": self._llm_base_url,
+                    "use_mp_agent": self._use_mp_agent,
+                    "use_proxy": self._use_proxy,
                 }
             )
 
+    def _send_message(
+            self,
+            task: Task,
+            phase: Literal["start", "end"],
+            context: Context | None = None,
+            process_result: ProcessResult | None = None,
+    ):
+        if not self._send_notify:
+            return
+        video_path = Path(task.video_path)
+        media_name = video_path.name
+        if context and context.media_info and context.meta_info:
+            media_info = context.media_info
+            if media_info.type == MediaType.TV:
+                media_name = (
+                    f"{media_info.title_year} {context.meta_info.season_episode}"
+                )
+            else:
+                media_name = f"{media_info.title_year}.{video_path.suffix}"
+        message = f"标题： {media_name}"
+        if phase == "start":
+            self.post_message(
+                title=f"【{self.plugin_name}】 任务开始",
+                image=context.media_info.get_message_image()
+                if context and context.meta_info
+                else None,
+                mtype=NotificationType.Plugin,
+                text=f"{message}",
+            )
+        else:
+            result = "完成"
+            if process_result and process_result.status == TaskStatus.FAILED:
+                result = "失败"
+            elif process_result and process_result.status == TaskStatus.CANCELED:
+                result = "取消"
+            stat_str = f"\n{task.statistics.to_string()}" if task.statistics else ""
+            self.post_message(
+                title=f"【{self.plugin_name}】 任务{result}",
+                mtype=NotificationType.Plugin,
+                image=context.media_info.get_message_image()
+                if context and context.meta_info
+                else None,
+                text=f"{message}\n备注：{process_result.message if process_result else ''}\n"
+                     f"Tokens：{task.tokens_used:,}{stat_str}",
+            )
+
     def __process_tasks(self):
         """
         后台线程：处理任务队列
@@ -1140,7 +1311,7 @@ class LexiAnnot(_PluginBase):
 
         self.__load_data()
         if not self._loaded:
-            logger.warn('插件数据未加载')
+            logger.warn("插件数据未加载")
             self._enabled = False
             self.__update_config()
             logger.debug("🛑 Worker exiting...")
@@ -1148,7 +1319,7 @@ class LexiAnnot(_PluginBase):
         if self._enable_gemini:
             self._gemini_available = True
             if not self._gemini_apikey:
-                logger.warn("未提供GEMINI APIKEY")
+                logger.warn("未提供 APIKEY")
                 self._gemini_available = False
 
         while not self._shutdown_event.is_set():
@@ -1156,143 +1327,178 @@ class LexiAnnot(_PluginBase):
                 task = self._task_queue.get(timeout=1)
                 if task is None:
                     continue
-                tokens = self._total_token_count
+                context = MediaChain().recognize_by_path(path=task.video_path)
+                cb = None
+                res = ProcessResult(status=TaskStatus.FAILED, message="未知错误")
                 try:
                     task.status = TaskStatus.RUNNING
+                    self._send_message(task, "start", context)
                     with SpacyWorker(self._spacy_model) as worker:
-                        task.status = self.__process_file(task.video_path, worker)
+                        with get_openai_callback() as cb:
+                            res = self._process_file(
+                                task.video_path,
+                                worker,
+                                context,
+                                task.params.skip_existing,
+                            )
+                        task.status = res.status
+                        task.message = res.message
+                        task.statistics = res.statistics
                 except Exception as e:
                     task.status = TaskStatus.FAILED
-                    logger.error(f"处理 {task} 出错: {e}")
+                    task.message = str(e)
+                    logger.error(f"处理 {task.task_id} 出错: {e}")
+                    res = ProcessResult(status=TaskStatus.FAILED, message=str(e))
                 finally:
                     self._task_queue.task_done()
-                    task.complete_time = datetime.now()
-                    task.tokens_used = self._total_token_count - tokens
+                    task.complete_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                    if cb:
+                        task.tokens_used = cb.total_tokens
+                        logger.info(f"任务 {task.task_id} 消耗 Tokens: "
+                                    f"Input ({cb.prompt_tokens:,}), Output ({cb.reasoning_tokens:,})")
                     self.save_tasks()
+                    self._send_message(task, "end", context, res)
             except queue.Empty:
                 continue
-        logger.debug(f"🛑 Worker thread {threading.get_ident():#x} received shutdown signal, exiting...")
+        logger.debug(
+            f"🛑 Worker thread {threading.get_ident():#x} received shutdown signal, exiting..."
+        )
 
-    def __process_file(self, path: str, spacy_worker: SpacyWorker) -> TaskStatus:
+    def _process_file(
+            self,
+            path: str,
+            spacy_worker: SpacyWorker,
+            mediainfo: Context | None = None,
+            skip_existing: bool = True
+    ) -> ProcessResult:
         """
         处理视频文件
         """
         if not self._loaded:
-            return TaskStatus.FAILED
-        lexicon = self.__load_lexicon_from_local()
-        if not lexicon:
+            return ProcessResult(status=TaskStatus.FAILED, message="插件数据未加载")
+        lexi = self._load_lexicon_from_local()
+        if not lexi:
             logger.error("字典加载失败")
-            return TaskStatus.FAILED
+            return ProcessResult(status=TaskStatus.FAILED, message="字典加载失败")
 
         video = Path(path)
         if video.suffix.lower() not in settings.RMT_MEDIAEXT:
-            return TaskStatus.CANCELED
+            return ProcessResult(status=TaskStatus.CANCELED, message="不支持的文件格式")
         if not video.exists() or not video.is_file():
             logger.warn(f"文件 {str(video)} 不存在, 跳过")
-            return TaskStatus.FAILED
-        subtitle = video.with_suffix(".en.ass")
-        if subtitle.exists():
-            logger.warn(f"字幕文件 ({subtitle}) 已存在, 跳过")
-            return TaskStatus.IGNORED
+            return ProcessResult(status=TaskStatus.FAILED, message="文件不存在")
+        ass_file = video.with_suffix(".en.ass")
+        if ass_file.exists() and skip_existing:
+            logger.warn(f"字幕文件 ({ass_file}) 已存在, 跳过")
+            return ProcessResult(status=TaskStatus.IGNORED, message="字幕文件已存在")
         logger.info(f"📂 Processing file: {path}")
-        if self._send_notify:
-            message = f"正在处理文件： {path}"
-            self.post_message(title=f"【{self.plugin_name}】",
-                              mtype=NotificationType.Plugin,
-                              text=f"{message}")
-        ffmpeg_path = self._ffmpeg_path if self._ffmpeg_path else 'ffmpeg'
-        eng_mark = ['en', 'en-US', 'eng', 'en-GB', 'english', 'en-AU']
-        embedded_subtitles = LexiAnnot._extract_subtitles_by_lang(path, eng_mark, ffmpeg_path)
+
+        ffmpeg_path = self._ffmpeg_path if self._ffmpeg_path else "ffmpeg"
+        eng_mark = ["en", "en-US", "eng", "en-GB", "english", "en-AU"]
+        embedded_subtitles = LexiAnnot._extract_subtitles_by_lang(
+            path, eng_mark, ffmpeg_path
+        )
         if not embedded_subtitles:
-            return TaskStatus.CANCELED
+            return ProcessResult(
+                status=TaskStatus.CANCELED, message="未找到嵌入式英文文本字幕"
+            )
         # order factor = 0, if 'SDH' in track['title']
         # order factor = track['duration'], otherwise
-        embedded_subtitles = sorted(embedded_subtitles,
-                                    key=lambda track: track['duration']*(1-int('SDH' in track['title'])),
-                                    reverse=True)
-        ret_message = ''
+        embedded_subtitles = sorted(
+            embedded_subtitles,
+            key=lambda track: track["duration"] * (1 - int("SDH" in track["title"])),
+            reverse=True,
+        )
+        ret_message = ""
+        stat = None
         if embedded_subtitles:
-            logger.info(f'提取到 {len(embedded_subtitles)} 条英语文本字幕')
+            logger.info(f"提取到 {len(embedded_subtitles)} 条英语文本字幕")
             for embedded_subtitle in embedded_subtitles:
                 if self._shutdown_event.is_set():
-                    return TaskStatus.CANCELED
-                ass_subtitle = pysubs2.SSAFile.from_string(embedded_subtitle['subtitle'], format_='ass')
-                if embedded_subtitle.get('codec_id') == 'S_TEXT/UTF8':
+                    return ProcessResult(
+                        status=TaskStatus.CANCELED, message="任务已取消"
+                    )
+                ass_subtitle = SSAFile.from_string(
+                    embedded_subtitle["subtitle"], format_="ass"
+                )
+                if embedded_subtitle.get("codec_id") == "S_TEXT/UTF8":
                     ass_subtitle = LexiAnnot.set_srt_style(ass_subtitle)
                 ass_subtitle = self.__set_style(ass_subtitle)
-                ass_subtitle = self.process_subtitles(ass_subtitle, lexicon.get('cefr'), lexicon.get('coca20k'),
-                                                      lexicon.get('examinations'), lexicon.get('swear_words'),
-                                                      spacy_worker)
+                ass_subtitle, stat = self.process_subtitles(
+                    ass_subtitle, lexi, spacy_worker, mediainfo
+                )
                 if self._shutdown_event.is_set():
-                    return TaskStatus.CANCELED
+                    return ProcessResult(
+                        status=TaskStatus.CANCELED, message="任务已取消"
+                    )
                 if ass_subtitle:
                     try:
-                        ass_subtitle.save(str(subtitle))
-                        ret_message = f"字幕已保存：{str(subtitle)}"
-                        logger.info(f"字幕已保存：{str(subtitle)}")
+                        ass_subtitle.save(str(ass_file))
+                        ret_message = "字幕已保存"
+                        logger.info(f"字幕已保存：{str(ass_file)}")
                     except Exception as e:
-                        ret_message = f"字幕文件 {subtitle} 保存失败, {e}"
-                        logger.error(f"字幕文件 {subtitle} 保存失败, {e}")
+                        ret_message = f"字幕文件 {ass_file} 保存失败"
+                        logger.error(f"字幕文件 {ass_file} 保存失败, {e}")
                     break
                 else:
-                    logger.info(f"处理字幕{embedded_subtitle['codec_id']}-{embedded_subtitle['stream_id']}失败")
+                    logger.info(
+                        f"处理字幕{embedded_subtitle['codec_id']}-{embedded_subtitle['stream_id']}失败"
+                    )
         else:
             logger.warn(f"未能在{path}中找到可提取的英文字幕")
         if not ret_message:
-            ret_message = f"未能在{path}中找到可提取的英文字幕"
+            ret_message = "未能找到可提取的英文字幕"
         logger.info(f"✅ Finished: {path}")
-        if self._send_notify:
-            self.post_message(title=f"【{self.plugin_name}】",
-                              mtype=NotificationType.Plugin,
-                              text=f"{ret_message}")
 
-        return TaskStatus.COMPLETED
+        return ProcessResult(status=TaskStatus.COMPLETED, message=ret_message, statistics=stat)
 
     @cached(maxsize=1, ttl=1800)
     def __load_lexicon_version(self) -> Optional[str]:
         logger.info("正在检查远程词典文件版本...")
-        url = f'{self._lexicon_repo}master/version'
+        url = f"{self._lexicon_repo}master/version"
         version = RequestUtils().get(url, headers=settings.REPO_GITHUB_HEADERS())
         if version is None:
             return None
         return version.strip()
 
-    @cached(maxsize=1, ttl=3600*6)
-    def __load_lexicon_from_local(self) -> Optional[Dict[str, Any]]:
+    @cached(maxsize=1, ttl=3600 * 24)
+    def _load_lexicon_from_local(self) -> Lexicon | None:
         data_path = self.get_data_path()
-        lexicon = {}
         try:
-            lexicon_path = data_path / 'lexicon.json'
-            with open(lexicon_path, 'r', encoding='utf-8') as f:
-                lexicon = json.load(f)
+            lexicon_path = data_path / "lexicon.json"
+            with open(lexicon_path, "r", encoding="utf-8") as f:
+                content = f.read()
+                lexicon_model = Lexicon.model_validate_json(content)
         except Exception as e:
-            logger.debug(f"词典文件读取失败: {e}")
-        lexicon_files = ('cefr', 'coca20k', 'swear_words', 'examinations', 'version')
-        if any(file not in lexicon for file in lexicon_files):
+            logger.error(f"词典文件加载失败: {e}")
             return None
-        return lexicon
+        return lexicon_model
 
-    def __retrieve_lexicon_online(self, version: str) -> Optional[Dict[str, Any]]:
-        logger.info('开始下载词典文件...')
-        lexicon_files = ['cefr', 'coca20k', 'swear_words', 'examinations']
-        lexicon = {}
+    def _retrieve_lexicon_online(self, version: str) -> Lexicon | None:
+        logger.info("开始下载词典文件...")
+        lexicon_files = ["cefr", "coca20k", "swear_words", "examinations"]
+        lexicon_dict = {}
         for file in lexicon_files:
-            url = f'{self._lexicon_repo}master/{file}.json'
+            url = f"{self._lexicon_repo}master/{file}.json"
             res = RequestUtils().get_res(url, headers=settings.REPO_GITHUB_HEADERS())
+            if not res:
+                return None
             if res.status_code == 200:
-                lexicon[file] = res.json()
-        if any(file not in lexicon for file in lexicon_files):
+                lexicon_dict[file] = res.json()
+        if any(file not in lexicon_dict for file in lexicon_files):
             return None
         logger.info(f"词典文件 (v{version}) 下载完成")
         data_path = self.get_data_path()
-        lexicon['version'] = version
+        lexicon_dict["version"] = version
         try:
-            lexicon_path = data_path / 'lexicon.json'
-            with open(lexicon_path, 'w', encoding='utf-8') as f:
-                json.dump(lexicon, f, ensure_ascii=False, indent=2)
+            lexicon_path = data_path / "lexicon.json"
+            with open(lexicon_path, "w", encoding="utf-8") as f:
+                json.dump(lexicon_dict, f, ensure_ascii=False, indent=2)
+            lexi = Lexicon.model_validate(lexicon_dict)
         except Exception as e:
             logger.warn(f"词典文件保存失败: {e}")
-        return lexicon
+            return None
+        return lexi
 
     def __load_data(self):
         """
@@ -1305,17 +1511,19 @@ class LexiAnnot(_PluginBase):
         except RuntimeError:
             nlp = LexiAnnot.__download_spacy_model(self._spacy_model)
 
-        lexicon = self.__load_lexicon_from_local()
-        latest = self.__load_lexicon_version() or '0.0.0'
-        if not lexicon or StringUtils.compare_version(lexicon.get('version') or '0.0.0', '<', latest):
-            lexicon = self.__retrieve_lexicon_online(latest)
-
-        if not (nlp and lexicon):
+        lexi = self._load_lexicon_from_local()
+        latest = self.__load_lexicon_version() or "0.0.0"
+        if not lexi or StringUtils.compare_version(
+                lexi.version or "0.0.0", "<", latest
+        ):
+            lexi = self._retrieve_lexicon_online(latest)
+            self._load_lexicon_from_local.cache_clear()
+        if not (nlp and lexi):
             self._loaded = False
             logger.warn("插件数据加载失败")
         else:
             self._loaded = True
-            logger.info(f"当前词典文件版本: {lexicon.get('version')}")
+            logger.info(f"当前词典文件版本: {lexi.version}")
 
     @staticmethod
     def __download_spacy_model(model_name: str) -> bool:
@@ -1325,7 +1533,7 @@ class LexiAnnot(_PluginBase):
                 [sys.executable, "-m", "spacy", "download", model_name],
                 capture_output=True,
                 text=True,
-                check=True
+                check=True,
             )
             with SpacyWorker(model_name):
                 nlp = True
@@ -1351,16 +1559,25 @@ class LexiAnnot(_PluginBase):
 
         # 入库数据
         transfer_info: TransferInfo | None = event_info.get("transferinfo")
-        if not transfer_info or not transfer_info.target_diritem or not transfer_info.target_diritem.path:
+        if (
+                not transfer_info
+                or not transfer_info.target_diritem
+                or not transfer_info.target_diritem.path
+        ):
             return
 
         # 检查是否为选择的媒体库
         in_libraries = False
-        libraries = {library.name: library.library_path for library in DirectoryHelper().get_library_dirs()}
+        libraries = {
+            library.name: library.library_path
+            for library in DirectoryHelper().get_library_dirs()
+        }
         for library_name in self._libraries:
             if library_name in libraries:
                 ll = libraries[library_name]
-                if ll and Path(transfer_info.target_diritem.path).is_relative_to(Path(ll)):
+                if ll and Path(transfer_info.target_diritem.path).is_relative_to(
+                        Path(ll)
+                ):
                     in_libraries = True
                     break
         if not in_libraries:
@@ -1368,87 +1585,17 @@ class LexiAnnot(_PluginBase):
 
         mediainfo: MediaInfo | None = event_info.get("mediainfo")
         if self._english_only and mediainfo:
-            if mediainfo.original_language and mediainfo.original_language != 'en':
-                logger.info(f"原始语言 ({mediainfo.original_language}) 不为英语, 跳过 {mediainfo.title}： ")
+            if mediainfo.original_language and mediainfo.original_language not in {
+                "en",
+                "eng",
+            }:
+                logger.info(
+                    f"原始语言 ({mediainfo.original_language}) 不为英语, 跳过 {mediainfo.title}： "
+                )
                 return
         for new_path in transfer_info.file_list_new or []:
             self.add_media_file(new_path)
 
-    @staticmethod
-    def query_cefr(word, cefr_lexicon):
-        word = word.lower().strip("-*'")
-        if word in cefr_lexicon:
-            return cefr_lexicon[word]
-        else:
-            return None
-
-    @staticmethod
-    def query_coca20k(word: str, lexicon: Dict[str, Any]):
-        word = word.lower().strip("-*'")
-        return lexicon.get(word)
-
-    @staticmethod
-    def query_examinations(word: str, lexicon: Dict[str, Any]) -> Dict[str, Any]:
-        res = {}
-        for examination, exam_lexicon in lexicon.items():
-            if word in exam_lexicon:
-                res[examination] = exam_lexicon[word]
-        return res
-
-    @staticmethod
-    def convert_pos_to_spacy(pos: str):
-        """
-        将给定的词性列表转换为 spaCy 库中使用的词性标签
-        :param pos: 字符串形式词性
-        :returns: 一个包含对应spaCy词性标签的列表。对于无法直接映射的词性，将返回None
-        """
-        spacy_pos_map = {
-            'noun': 'NOUN',
-            'adjective': 'ADJ',
-            'adverb': 'ADV',
-            'verb': 'VERB',
-            'preposition': 'ADP',
-            'conjunction': 'CCONJ',
-            'determiner': 'DET',
-            'pronoun': 'PRON',
-            'interjection': 'INTJ',
-            'number': 'NUM'
-        }
-
-        pos_lower = pos.lower()
-        if pos_lower in spacy_pos_map:
-            spacy_pos = spacy_pos_map[pos_lower]
-        elif pos_lower == 'be-verb':
-            spacy_pos = 'AUX'  # Auxiliary verb (e.g., be, do, have)
-        elif pos_lower == 'vern':
-            spacy_pos = 'VERB'  # Assuming 'vern' is a typo for 'verb'
-        elif pos_lower == 'modal auxiliary':
-            spacy_pos = 'AUX'  # Modal verbs are also auxiliaries
-        elif pos_lower == 'do-verb':
-            spacy_pos = 'AUX'
-        elif pos_lower == 'have-verb':
-            spacy_pos = 'AUX'
-        elif pos_lower == 'infinitive-to':
-            spacy_pos = 'PART'  # Particle (e.g., to in "to go")
-        elif not pos_lower:  # Handle empty strings
-            spacy_pos = None
-        else:
-            spacy_pos = None  # For unmapped POS tags
-        return spacy_pos
-
-    @staticmethod
-    def get_cefr_by_spacy(lemma_: str, pos_: str, cefr_lexicon: Dict[str, Any]) -> Optional[str]:
-        result = LexiAnnot.query_cefr(lemma_, cefr_lexicon)
-        if result:
-            all_cefr = []
-            if len(result) > 0:
-                for entry in result:
-                    if pos_ == LexiAnnot.convert_pos_to_spacy(entry['pos']):
-                        return entry['cefr']
-                    all_cefr.append(entry['cefr'])
-            return min(all_cefr)
-        return None
-
     @staticmethod
     def format_duration(ms):
         total_seconds, milliseconds = divmod(ms, 1000)
@@ -1457,6 +1604,19 @@ class LexiAnnot(_PluginBase):
         hundredths = milliseconds // 10
         return f"{hours}:{minutes:02}:{seconds:02}.{hundredths:02}"
 
+    @staticmethod
+    def _remove_substring(replacements: list[dict]):
+        new_list = []
+        replacements.sort(key=lambda x: x["end"] - x["start"], reverse=True)
+        for r in replacements:
+            if any(
+                    (r["start"] >= new["start"] and r["end"] <= new["end"])
+                    for new in new_list
+            ):
+                continue
+            new_list.append(r)
+        return new_list
+
     @staticmethod
     def replace_by_plaintext_positions(line: SSAEvent, replacements: List[dict]):
         """
@@ -1488,7 +1648,7 @@ class LexiAnnot(_PluginBase):
             mapping[p_index] = t_index
             p_index += 1
             t_index += 1
-
+        replacements = LexiAnnot._remove_substring(replacements)
         # 按照 mapping 执行替换（倒序替换防止位置错位）
         new_text = text
         for r in sorted(replacements, key=lambda x: x["start"], reverse=True):
@@ -1505,27 +1665,27 @@ class LexiAnnot(_PluginBase):
     def analyze_ass_language(ass_file: SSAFile):
         styles = {}
         for style in ass_file.styles:
-            styles[style] = {'text': [], 'duration': 0, 'text_size': 0, 'times': 0}
+            styles[style] = {"text": [], "duration": 0, "text_size": 0, "times": 0}
         for dialogue in ass_file:
             style = dialogue.style
             text = dialogue.plaintext
-            sub_text = text.split('\n')
+            sub_text = text.split("\n")
             if style not in styles or not text:
                 continue
-            styles[style]['text'].extend(sub_text)
-            styles[style]['duration'] += dialogue.duration
-            styles[style]['text_size'] += len(text)
-            styles[style]['times'] += 1
+            styles[style]["text"].extend(sub_text)
+            styles[style]["duration"] += dialogue.duration
+            styles[style]["text_size"] += len(text)
+            styles[style]["times"] += 1
         style_language_analysis = {}
         for style_name, data in styles.items():
-            all_text = ' '.join(data['text'])
+            all_text = " ".join(data["text"])
             if not all_text.strip():
                 style_language_analysis[style_name] = None
                 continue
 
             languages = []
             # 对每个文本片段进行语言检测
-            for text_fragment in data['text']:
+            for text_fragment in data["text"]:
                 try:
                     lang = detect(text_fragment)
                     languages.append(lang)
@@ -1537,19 +1697,22 @@ class LexiAnnot(_PluginBase):
             if languages:
                 language_counts = Counter(languages)
                 most_common_language = language_counts.most_common(1)[0]
-                style_language_analysis[style_name] = {"main_language": most_common_language[0],
-                                                       "proportion": most_common_language[1] / len(languages),
-                                                       "duration": data['duration'],
-                                                       "text_size": data['text_size'],
-                                                       "times": data['times']}
+                style_language_analysis[style_name] = {
+                    "main_language": most_common_language[0],
+                    "proportion": most_common_language[1] / len(languages),
+                    "duration": data["duration"],
+                    "text_size": data["text_size"],
+                    "times": data["times"],
+                }
             else:
                 style_language_analysis[style_name] = None
 
         return style_language_analysis
 
     @staticmethod
-    def select_main_style_weighted(language_analysis: Dict[str, Any], known_language: str,
-                                   weights=None):
+    def select_main_style_weighted(
+            language_analysis: Dict[str, Any], known_language: str, weights=None
+    ):
         """
         根据语言分析结果和已知的字幕语言，使用加权评分选择主要样式
 
@@ -1559,23 +1722,32 @@ class LexiAnnot(_PluginBase):
         :returns: 主要字幕的样式名称，如果没有匹配的样式则返回 None
         """
         if weights is None:
-            weights = {'times': 0.5, 'text_size': 0.4, 'duration': 0.1}
+            weights = {"times": 0.5, "text_size": 0.4, "duration": 0.1}
         matching_styles = []
-        max_times = max([analysis.get('times', 0) for _, analysis in language_analysis.items() if analysis]) or 1
-        max_text_size = max(
-            [analysis.get('text_size', 0) for _, analysis in language_analysis.items() if analysis]) or 1
-        max_duration = max([analysis.get('duration', 0) for _, analysis in language_analysis.items() if analysis]) or 1
+        max_times = max([analysis.get("times", 0) for _, analysis in language_analysis.items() if analysis]) or 1
+        max_text_size = (
+                    max([analysis.get("text_size", 0) for _, analysis in language_analysis.items() if analysis]) or 1)
+        max_duration = (
+                max(
+                    [
+                        analysis.get("duration", 0)
+                        for _, analysis in language_analysis.items()
+                        if analysis
+                    ]
+                )
+                or 1
+        )
         for style, analysis in language_analysis.items():
             if not analysis:
                 continue
-            if analysis.get('main_language') == known_language:
+            if analysis.get("main_language") == known_language:
                 # 跳过多语言
-                if analysis.get('proportion', 0) < 0.5:
+                if analysis.get("proportion", 0) < 0.5:
                     continue
                 score = 0
-                score += analysis.get('times', 0) * weights.get('times', 0) / max_times
-                score += analysis.get('text_size', 0) * weights.get('text_size', 0) / max_text_size
-                score += analysis.get('duration', 0) * weights.get('duration', 0) / max_duration
+                score += analysis.get("times", 0) * weights.get("times", 0) / max_times
+                score += analysis.get("text_size", 0) * weights.get("text_size", 0) / max_text_size
+                score +=  analysis.get("duration", 0) * weights.get("duration", 0) / max_duration
                 matching_styles.append((style, score))
 
         if not matching_styles:
@@ -1586,67 +1758,80 @@ class LexiAnnot(_PluginBase):
 
     @staticmethod
     def set_srt_style(ass: SSAFile) -> SSAFile:
-        ass.info['ScaledBorderAndShadow'] = 'no'
-        play_res_y = int(ass.info['PlayResY'])
-        if 'Default' in ass.styles:
-            ass.styles['Default'].marginv = play_res_y // 16
-            ass.styles['Default'].fontname = 'Microsoft YaHei'
-            ass.styles['Default'].fontsize = play_res_y // 16
+        ass.info["ScaledBorderAndShadow"] = "no"
+        play_res_y = int(ass.info["PlayResY"])
+        if "Default" in ass.styles:
+            ass.styles["Default"].marginv = play_res_y // 16
+            ass.styles["Default"].fontname = "Microsoft YaHei"
+            ass.styles["Default"].fontsize = play_res_y // 16
         return ass
 
     def __set_style(self, ass: SSAFile) -> SSAFile:
-        font_scaling = float(self._font_scaling) if self._font_scaling and len(self._font_scaling) else 1
-        play_res_y = int(ass.info['PlayResY'])
-        play_res_x = int(ass.info['PlayResX'])
+        font_scaling = (
+            float(self._font_scaling)
+            if self._font_scaling and len(self._font_scaling)
+            else 1
+        )
+        play_res_y = int(ass.info["PlayResY"])
+        play_res_x = int(ass.info["PlayResX"])
         # 创建一个新样式
         fs = play_res_y // 16 * font_scaling
-        new_style = pysubs2.SSAStyle()
-        new_style.name = 'Annotation EN'
-        new_style.fontname = 'Times New Roman'
+        new_style = SSAStyle()
+        new_style.name = "Annotation EN"
+        new_style.fontname = "Times New Roman"
         new_style.fontsize = fs
-        new_style.primarycolor = pysubs2.Color(self._accent_color_rgb[0],
-                                               self._accent_color_rgb[1],
-                                               self._accent_color_rgb[2],
-                                               self._color_alpha)  # 黄色 (BGR, alpha)
+        new_style.primarycolor = Color(
+            self._accent_color_rgb[0],
+            self._accent_color_rgb[1],
+            self._accent_color_rgb[2],
+            self._color_alpha,
+        )  # 黄色 (BGR, alpha)
         new_style.bold = True
         new_style.italic = False
         new_style.outline = 1
         new_style.shadow = 0
-        new_style.alignment = pysubs2.Alignment.TOP_LEFT
+        new_style.alignment = Alignment.TOP_LEFT
         new_style.marginl = play_res_x // 20
         new_style.marginr = play_res_x // 20
         new_style.marginv = int(fs)
-        ass.styles['Annotation EN'] = new_style
+        ass.styles["Annotation EN"] = new_style
         zh_style = new_style.copy()
-        zh_style.name = 'Annotation ZH'
-        zh_style.fontname = 'Microsoft YaHei'
-        zh_style.primarycolor = pysubs2.Color(255, 255, 255, self._color_alpha)
-        ass.styles['Annotation ZH'] = zh_style
+        zh_style.name = "Annotation ZH"
+        zh_style.fontname = "Microsoft YaHei"
+        zh_style.primarycolor = Color(255, 255, 255, self._color_alpha)
+        ass.styles["Annotation ZH"] = zh_style
+
+        usage_style = zh_style.copy()
+        usage_style.name = "Annotation USAGE"
+        usage_style.fontsize = fs * 0.5
+        usage_style.italic = True
+        usage_style.primarycolor = Color(224, 224, 224, self._color_alpha)
+        ass.styles["Annotation USAGE"] = usage_style
 
         pos_style = zh_style.copy()
-        pos_style.name = 'Annotation POS'
-        pos_style.fontname = 'Times New Roman'
+        pos_style.name = "Annotation POS"
+        pos_style.fontname = "Times New Roman"
         pos_style.fontsize = fs * 0.75
         pos_style.italic = True
-        ass.styles['Annotation POS'] = pos_style
+        ass.styles["Annotation POS"] = pos_style
 
         phone_style = pos_style.copy()
-        phone_style.name = 'Annotation PHONE'
-        phone_style.fontname = 'Arial'
+        phone_style.name = "Annotation PHONE"
+        phone_style.fontname = "Arial"
         phone_style.fontsize = fs * 0.75
         phone_style.bold = False
         phone_style.italic = False
-        ass.styles['Annotation PHONE'] = phone_style
+        ass.styles["Annotation PHONE"] = phone_style
 
         pos_def_cn_style = zh_style.copy()
-        pos_def_cn_style.name = 'DETAIL CN'
+        pos_def_cn_style.name = "DETAIL CN"
         pos_def_cn_style.fontsize = fs * 0.7
-        ass.styles['DETAIL CN'] = pos_def_cn_style
+        ass.styles["DETAIL CN"] = pos_def_cn_style
 
         pos_def_pos_style = pos_style.copy()
-        pos_def_pos_style.name = 'DETAIL POS'
+        pos_def_pos_style.name = "DETAIL POS"
         pos_def_pos_style.fontsize = fs * 0.6
-        ass.styles['DETAIL POS'] = pos_def_pos_style
+        ass.styles["DETAIL POS"] = pos_def_pos_style
 
         cefr_style = pos_style.copy()
         cefr_style.name = "Annotation CEFR"
@@ -1654,55 +1839,58 @@ class LexiAnnot(_PluginBase):
         cefr_style.fontsize = fs * 0.5
         cefr_style.bold = True
         cefr_style.italic = False
-        cefr_style.primarycolor = pysubs2.Color(self._accent_color_rgb[0],
-                                                self._accent_color_rgb[1],
-                                                self._accent_color_rgb[2],
-                                                self._color_alpha)
+        cefr_style.primarycolor = Color(
+            self._accent_color_rgb[0],
+            self._accent_color_rgb[1],
+            self._accent_color_rgb[2],
+            self._color_alpha,
+        )
         cefr_style.outline = 1
         cefr_style.shadow = 0
-        ass.styles['Annotation CEFR'] = cefr_style
-        ass.styles['Annotation EXAM'] = cefr_style
+        ass.styles["Annotation CEFR"] = cefr_style
+        ass.styles["Annotation EXAM"] = cefr_style
         return ass
 
     @staticmethod
     def hex_to_rgb(hex_color) -> Optional[Tuple]:
         if not hex_color:
             return None
-        pattern = r'^#[0-9a-fA-F]{6}$'
+        pattern = r"^#[0-9a-fA-F]{6}$"
         if re.match(pattern, hex_color) is None:
             return None
-        hex_color = hex_color.lstrip('#')  # 去掉前面的 #
-        return tuple(int(hex_color[i:i + 2], 16) for i in (0, 2, 4))
+        hex_color = hex_color.lstrip("#")  # 去掉前面的 #
+        return tuple(int(hex_color[i: i + 2], 16) for i in (0, 2, 4))
 
     @staticmethod
-    def __extract_subtitle(video_path: str,
-                           subtitle_stream_index: str,
-                           ffmpeg_path: str = 'ffmpeg',
-                           sub_format='ass') -> Optional[str]:
-        if sub_format not in ['srt', 'ass']:
-            raise ValueError('Invalid subtitle format')
+    def __extract_subtitle(
+            video_path: str,
+            subtitle_stream_index: str,
+            ffmpeg_path: str = "ffmpeg",
+            sub_format="ass",
+    ) -> Optional[str]:
+        if sub_format not in ["srt", "ass"]:
+            raise ValueError("Invalid subtitle format")
         try:
             map_parameter = f"0:s:{subtitle_stream_index}"
-            command = [
-                ffmpeg_path,
-                '-i', video_path,
-                '-map', map_parameter,
-                '-f', sub_format,
-                '-'
-            ]
-            result = subprocess.run(command, capture_output=True, text=True, encoding='utf-8', check=True)
+            command = [ffmpeg_path, "-i", video_path, "-map", map_parameter, "-f", sub_format, "-"]
+            result = subprocess.run(
+                command, capture_output=True, text=True, encoding="utf-8", check=True
+            )
             return result.stdout
         except FileNotFoundError:
             logger.warn(f"错误：找不到视频文件 '{video_path}'")
             return None
         except subprocess.CalledProcessError as e:
             logger.warn(f"错误：提取字幕失败。\n错误信息：{e}")
-            logger.warn(f"FFmpeg 输出 (stderr):\n{e.stderr.decode('utf-8', errors='ignore')}")
+            logger.warn(
+                f"FFmpeg 输出 (stderr):\n{e.stderr.decode('utf-8', errors='ignore')}"
+            )
             return None
 
     @staticmethod
-    def _extract_subtitles_by_lang(video_path: str, lang: str | list = 'en', ffmpeg: str = 'ffmpeg'
-                                   ) -> Optional[List[Dict]]:
+    def _extract_subtitles_by_lang(
+            video_path: str, lang: str | list = "en", ffmpeg: str = "ffmpeg"
+    ) -> Optional[List[Dict]]:
         """
         提取视频文件中的内嵌英文字幕，使用 MediaInfo 查找字幕流。
         """
@@ -1712,28 +1900,42 @@ class LexiAnnot(_PluginBase):
                 return track_lang in lang
             return track_lang == lang
 
-        supported_codec = ['S_TEXT/UTF8', 'S_TEXT/ASS']
+        supported_codec = ["S_TEXT/UTF8", "S_TEXT/ASS"]
         subtitles = []
         try:
             media_info: pymediainfo.MediaInfo = pymediainfo.MediaInfo.parse(video_path)
             for track in media_info.tracks:
-                if (track.track_type == 'Text' and check_lang(track_lang=track.language)
-                        and track.codec_id in supported_codec):
-                    subtitle_stream_index = track.stream_identifier  # MediaInfo 的 stream_id 从 1 开始，ffmpeg 从 0 开始
-                    subtitle = LexiAnnot.__extract_subtitle(video_path, subtitle_stream_index, ffmpeg)
+                if (
+                        track.track_type == "Text"
+                        and check_lang(track_lang=track.language)
+                        and track.codec_id in supported_codec
+                ):
+                    subtitle_stream_index = (
+                        track.stream_identifier
+                    )  # MediaInfo 的 stream_id 从 1 开始，ffmpeg 从 0 开始
+                    extracted_subtitle = LexiAnnot.__extract_subtitle(
+                        video_path, subtitle_stream_index, ffmpeg
+                    )
                     duration = 0
-                    if hasattr(track, 'duration'):
+                    if hasattr(track, "duration"):
                         try:
                             duration = int(float(track.duration))
                         except (ValueError, TypeError):
                             pass
-                    if subtitle:
-                        subtitles.append({'title': track.title or '', 'subtitle': subtitle, 'codec_id': track.codec_id,
-                                          'stream_id': subtitle_stream_index, 'duration': duration})
+                    if extracted_subtitle:
+                        subtitles.append(
+                            {
+                                "title": track.title or "",
+                                "subtitle": extracted_subtitle,
+                                "codec_id": track.codec_id,
+                                "stream_id": subtitle_stream_index,
+                                "duration": duration,
+                            }
+                        )
             if subtitles:
                 return subtitles
             else:
-                logger.warn('未找到标记为英语的文本字幕流')
+                logger.warn("未找到标记为英语的文本字幕流")
                 return None
 
         except FileNotFoundError:
@@ -1747,309 +1949,189 @@ class LexiAnnot(_PluginBase):
             logger.error(f"使用 MediaInfo 提取字幕时发生错误：{e}")
             return None
 
-    def __query_gemini(
+    def _process_chain(
             self,
-            tasks: TranslationTasks,
-            api_key: str,
-            system_instruction: str,
-            model: str,
-            temperature: float
-    ) -> List[T]:
-        response = translate(
-            api_key=api_key,
-            translation_tasks=tasks,
-            system_instruction=system_instruction,
-            gemini_model=model,
-            temperature=temperature,
-            max_retries=self._max_retries
+            segments: SegmentList,
+            lexi: Lexicon,
+            spacy_worker: SpacyWorker,
+            mediainfo: Context | None = None,
+    ) -> SegmentList:
+        """
+        处理字幕行
+
+        :param segments: 待处理的字幕
+        :param lexi: 词典对象
+        :param spacy_worker: spaCy 分词器
+        :returns: 处理后的字幕行列表
+        """
+        simple_vocabulary = set(
+            filter(
+                lambda x: x < self._annot_level, ["A1", "A2", "B1", "B2", "C1", "C2"]
+            )
         )
 
-        if not response.success:
-            logger.warning(f"Error in response: {response.message}")
-            return tasks.tasks
-
-        self._total_token_count += response.total_token_count
-        return response.tasks
-
-    def __process_by_ai(self, lines_to_process: List[Dict[str, Any]],
-                        cefr_lexicon: Dict[str, Any],
-                        coca20k_lexicon: Dict[str, Any],
-                        exams_lexicon: Dict[str, Any],
-                        swear_words: List[str],
-                        spacy_worker: SpacyWorker):
-
-        def __replace_with_spaces(_text):
-            """
-            使用等长的空格替换文本中的 [xxx] 模式。
-            例如："[Hi]" 会被替换成 "    " (4个空格)
-            """
-            pattern = r'(\[.*?\])'
-            return re.sub(pattern, lambda match: ' ' * len(match.group(1)), _text)
-
-        simple_vocabulary = list(filter(lambda x: x < self._annot_level, ['A1', 'A2', 'B1', 'B2', 'C1', 'C2']))
-        patterns = [r'\d+th|\d?1st|\d?2nd|\d?3rd', r"\w+'s$", r"\w+'d$", r"\w+'t$", "[Ii]'m$", r"\w+'re$", r"\w+'ve$", r"\w+'ll$"]
-        compiled_patterns = [re.compile(p) for p in patterns]
-        model_temperature = float(self._model_temperature) if self._model_temperature else 0.3
+        # model_temperature = float(self._model_temperature) if self._model_temperature else 0.1
         logger.info("通过 spaCy 分词...")
-        vocabulary_trans_instruction = '''You are an expert translator. You will be given a list of English words along with their context, formatted as JSON. For each entry, provide the most appropriate translation in Simplified Chinese based on the context.
-Only complete the `Chinese` field. Do not include pinyin, explanations, or any additional information.'''
-        # 使用nlp分词
-        for line_data in lines_to_process:
+        for seg in segments:
             if self._shutdown_event.is_set():
-                return lines_to_process
-            text_raw = line_data.get('raw_subtitle')
-            text = text_raw.replace('\n', ' ')
-            text = __replace_with_spaces(text)
-            new_vocab = []
-            doc = spacy_worker.submit(text)
-            last_end_pos = 0
-            lemma_to_query = []
-            for token in doc:
-                if len(token['text']) == 1:
-                    continue
-                if token['lemma_'] in swear_words:
-                    continue
-                if token['pos_'] not in ('NOUN', 'AUX', 'VERB', 'ADJ', 'ADV', 'ADP', 'CCONJ', 'SCONJ'):
-                    continue
-                striped = token['lemma_'].strip('-[')
-                if any(p.match(striped) for p in compiled_patterns):
-                    continue
-                cefr = LexiAnnot.get_cefr_by_spacy(striped, token['pos_'], cefr_lexicon)
-                if cefr and cefr in simple_vocabulary:
-                    continue
-                res_of_coco = LexiAnnot.query_coca20k(striped, coca20k_lexicon)
-                if res_of_coco and not cefr:
-                    cefr = ''
-                res_of_exams = self.query_examinations(striped, exams_lexicon)
-                exam_tags = []
-                if res_of_exams:
-                    exam_tags = [exam_id for exam_id in res_of_exams if exam_id in self._exam_tags]
-                if striped in lemma_to_query:
-                    continue
-                else:
-                    lemma_to_query.append(striped)
-                striped_text = token['text'].strip('-*[')
-                start_pos = text.find(striped_text, last_end_pos)
-                end_pos = start_pos + len(striped_text)
-                phonetics = ''
-                pos_defs = []
-                if res_of_exams:
-                    for exam, value in res_of_exams.items():
-                        phonetics = value.get('ipa_uk') or ''
-                        defs = {}
-                        for pos_def in value.get('defs', []):
-                            pos = pos_def.get('pos', '')
-                            definition_cn = pos_def.get('definition_cn', '')
-                            defs.setdefault(pos, []).append(definition_cn)
-                        pos_defs = [{'pos': pos, 'meanings': meanings} for pos, meanings in defs.items() if pos]
-                        break
-                elif res_of_coco:
-                    phonetics = res_of_coco.get('phonetics_1') or ''
-                    pos_defs = res_of_coco.get('pos_defs') or []
-                last_end_pos = end_pos
-                new_vocab.append({'start': start_pos, 'end': end_pos, 'text': striped_text, 'lemma': striped,
-                                  'pos': token['pos_'], 'cefr': cefr, 'Chinese': '', 'phonetics': phonetics,
-                                  'pos_defs': pos_defs, 'exam_tags': exam_tags})
-            line_data['new_vocab'] = new_vocab
-        # 查询词汇翻译
-        task_bulk: List[VocabularyTranslationTask] = []
-        i = 0
+                return segments
+            seg.candidate_words = extract_advanced_words(
+                segment=seg,
+                lexi=lexi,
+                spacy_worker=spacy_worker,
+                simple_level=simple_vocabulary,
+                exams=self._exam_tags,
+            )
         if self._gemini_available:
-            logger.info("查询词汇翻译...")
-        for line_data in lines_to_process:
-            if self._shutdown_event.is_set():
-                return lines_to_process
-            if not self._gemini_available:
-                break
-            i += 1
-            if not (len(line_data["new_vocab"]) or (i == len(lines_to_process) and len(task_bulk))):
-                continue
-            new_vocab = [Vocabulary(lemma=new_vocab['lemma'], Chinese='') for new_vocab in line_data['new_vocab']]
-            task_bulk.append(
-                VocabularyTranslationTask(
-                    index=line_data['index'],
-                    id=f"{line_data['index']}",
-                    vocabulary=new_vocab,
-                    context=Context(
-                        original_text=line_data['raw_subtitle'].replace('\n', ' ')
-                    )
-                )
+            if self._use_mp_agent:
+                llm_apikey = settings.LLM_API_KEY
+                llm_base_url = settings.LLM_BASE_URL
+                llm_model_name = settings.LLM_MODEL
+                llm_provider = settings.LLM_PROVIDER.lower()
+            else:
+                llm_apikey = self._gemini_apikey
+                llm_base_url = self._llm_base_url
+                llm_model_name = self._gemini_model
+                llm_provider = self._llm_provider.lower()
+            llm = initialize_llm(
+                provider=llm_provider,
+                model_name=llm_model_name,
+                base_url=llm_base_url,
+                api_key=llm_apikey,
+                temperature=self._model_temperature,
+                max_retries=self._max_retries,
+                proxy=self._use_proxy,
             )
-            if len(task_bulk) >= self._context_window or (len(task_bulk) and i == len(lines_to_process)):
-                logger.info(f"processing dialogues: "
-                            f"{LexiAnnot.format_duration(lines_to_process[task_bulk[0].index]['time_code'][0])} -> "
-                            f"{LexiAnnot.format_duration(lines_to_process[i - 1]['time_code'][1])}")
-                answer: List[VocabularyTranslationTask] = self.__query_gemini(
-                    TranslationTasks[VocabularyTranslationTask](tasks=task_bulk),
-                    self._gemini_apikey,
-                    vocabulary_trans_instruction,
-                    self._gemini_model,
-                    model_temperature
-                )
-                if not answer:
-                    continue
-                time.sleep(self._request_interval)
-                for answer_line in answer:
-                    answer_lemma = tuple(v.lemma for v in answer_line.vocabulary)
-                    filtered_raw = [x for x in lines_to_process if x.get('index') == answer_line.index]
-                    if not len(filtered_raw):
-                        logger.warn(f'Unknown answer: {answer_line.index}: {answer_line.context.original_text}')
-                    available_answer = False
-                    for item in filtered_raw:
-                        lemma = tuple(v['lemma'] for v in item['new_vocab'])
-                        if lemma == answer_lemma:
-                            available_answer = True
-                            for i_, v in enumerate(item['new_vocab']):
-                                v['Chinese'] = answer_line.vocabulary[i_].Chinese
-                            break
-                    if not available_answer:
-                        logger.warn(f'Unknown answer: {answer_line.index}: {answer_line.context.original_text}')
-                task_bulk = []
-        if not self._sentence_translation:
-            return lines_to_process
-        if self._gemini_available:
-            logger.info("查询整句翻译...")
-        # 查询整句翻译
-        translation_tasks: List[DialogueTranslationTask] = []
-        for line_data in lines_to_process:
-            translation_tasks.append(
-                DialogueTranslationTask(
-                    id=f"{line_data['index']}",
-                    index=line_data['index'],
-                    original_text=line_data['raw_subtitle'].replace('\n', ' '),
-                    Chinese=''
-                )
+            segments = llm_process_chain(
+                lexi=lexi,
+                llm=llm,
+                segments=segments,
+                shutdown_event=self._shutdown_event,
+                context_window=self._context_window,
+                leaner_level=self._annot_level,
+                media_context=mediainfo,
+                translate_sentences=self._sentence_translation
             )
-        i = 0
-        dialog_trans_instruction = '''You are an expert translator. You will be given a list of dialogue translation tasks in JSON format. For each entry, provide the most appropriate translation in Simplified Chinese based on the context. 
-Only complete the `Chinese` field. Do not include pinyin, explanations, or any additional information.'''
-        while i < len(translation_tasks):
-            if self._shutdown_event.is_set():
-                return lines_to_process
-            if not self._gemini_available:
-                break
-            start_index = max(0, i - 1)
-            end_index = min(len(translation_tasks), i + self._context_window + 1)
-            task_bulk: List[DialogueTranslationTask] = translation_tasks[start_index:end_index]
-            logger.info(f"processing dialogues: "
-                        f"{LexiAnnot.format_duration(lines_to_process[i]['time_code'][0])} -> "
-                        f"{LexiAnnot.format_duration(lines_to_process[min(len(translation_tasks), i + self._context_window) - 1]['time_code'][1])}")
-            answer: List[DialogueTranslationTask] = self.__query_gemini(
-                TranslationTasks[DialogueTranslationTask](tasks=task_bulk),
-                self._gemini_apikey,
-                dialog_trans_instruction,
-                self._gemini_model,
-                model_temperature
-            )
-            time.sleep(self._request_interval)
-            for answer_line in answer:
-                if answer_line.index not in range(i, i + self._context_window):
-                    continue
-                filtered_raw = [x for x in lines_to_process if x.get('index') == answer_line.index]
-                if not len(filtered_raw):
-                    logger.warn(f'Unknown answer: {answer_line.index}: {answer_line.original_text}')
-                available_answer = False
-                for item in filtered_raw:
-                    if item['raw_subtitle'].replace('\n', ' ') == answer_line.original_text:
-                        available_answer = True
-                        item['Chinese'] = answer_line.Chinese
-                        break
-                if not available_answer:
-                    logger.warn(f'Unknown answer: {answer_line.index}: {answer_line.original_text}')
-            i += self._context_window
-        return lines_to_process
+        return segments
 
-    def process_subtitles(self, ass_file: SSAFile,
-                          cefr_lexicon: Dict[str, Any],
-                          coca20k_lexicon: Dict[str, Any],
-                          exams_lexicon: Dict[str, Any],
-                          swear_words: List[str],
-                          spacy_worker: SpacyWorker) -> Optional[SSAFile]:
+    def process_subtitles(
+            self,
+            ass_file: SSAFile,
+            lexi: Lexicon,
+            spacy_worker: SpacyWorker,
+            mediainfo: Context | None = None,
+    ) -> tuple[SSAFile | None, SegmentStatistics | None]:
         """
         处理字幕内容，标记词汇并添加翻译。
         """
-        lang = 'en'
-        abgr_str = (f'&H{self._color_alpha:02x}{self._accent_color_rgb[2]:02x}'
-                    f'{self._accent_color_rgb[1]:02x}{self._accent_color_rgb[0]:02x}&')  # &H00FFFFFF&
-        pos_map = {
-            'NOUN': 'n.',
-            'AUX': 'aux.',
-            'VERB': 'v.',
-            'ADJ': 'adj.',
-            'ADV': 'adv.',
-            'ADP': 'prep.',
-            'CCONJ': 'conj.',
-            'SCONJ': 'conj.'
-        }
+        lang = "en"
+        abgr_str = (
+            f"&H{self._color_alpha:02x}{self._accent_color_rgb[2]:02x}"
+            f"{self._accent_color_rgb[1]:02x}{self._accent_color_rgb[0]:02x}&"
+        )  # &H00FFFFFF&
+
         statistical_res = LexiAnnot.analyze_ass_language(ass_file)
-        main_style: str | None = LexiAnnot.select_main_style_weighted(statistical_res, lang)
+        main_style: str | None = LexiAnnot.select_main_style_weighted(
+            statistical_res, lang
+        )
         if not main_style:
-            logger.error('无法确定主要字幕样式')
-            return None
-        index = 0
-        lines_to_process = []
-        main_dialogue: Dict[int, SSAEvent] = {}
+            logger.error("无法确定主要字幕样式")
+            return None, None
+        # main_dialogue: Dict[int, SSAEvent] = {}
+        main_processor = SubtitleProcessor()
+        IDGenerator().reset()
         for dialogue in ass_file:
             if dialogue.style != main_style:
                 continue
-            time_code = (dialogue.start, dialogue.end)
-            text_raw = dialogue.plaintext
-            line_data = {'index': index, 'time_code': time_code, 'raw_subtitle': text_raw, 'new_vocab': [],
-                         'Chinese': ''}
-            lines_to_process.append(line_data)
-            main_dialogue[index] = dialogue
-            index += 1
-        lines_to_process = self.__process_by_ai(lines_to_process, cefr_lexicon, coca20k_lexicon, exams_lexicon,
-                                                swear_words, spacy_worker)
-
+            main_processor.append(dialogue)
+        segments = SegmentList(root=list(main_processor.segment_generator()))
+        segments = self._process_chain(
+            segments=segments, lexi=lexi, spacy_worker=spacy_worker, mediainfo=mediainfo
+        )
         # 在原字幕添加标注
         main_style_fs = ass_file.styles[main_style].fontsize
-        for line_data in lines_to_process:
+        __N = r"\N"
+        for seg in segments:
             if self._shutdown_event.is_set():
-                return None
-            if line_data['new_vocab']:
-                replacements = line_data['new_vocab']
-                for replacement in replacements:
-                    part_of_speech = f"{{\\fnTimes New Roman\\fs{int(main_style_fs * 0.75)}\\i1}}{pos_map[replacement['pos']]}{{\\r}}"
-                    new_text = f"{{\\c{abgr_str}}}{replacement['text']}{{\\r}}"
+                return None, None
+            if seg.candidate_words:
+                replacements = []
+                for word in seg.candidate_words:
+                    exams = [exam for exam in word.exams if exam in self._exam_tags]
+                    new_text = f"{{\\c{abgr_str}}}{word.text}{{\\r}}"
                     if self._in_place:
-                        new_text = new_text + f" ({replacement['Chinese']} {part_of_speech})" if replacement[
-                            'Chinese'] else ""
+                        part_of_speech = f"{{\\fnTimes New Roman\\fs{int(main_style_fs * 0.75)}\\i1}}{UNIVERSAL_POS_MAP[word.pos] or ''}{{\\r}}"
+                        new_text = (
+                            new_text + f" ({word.llm_translation} {part_of_speech})"
+                            if word.llm_translation
+                            else ""
+                        )
                     else:
-                        dialogue = pysubs2.SSAEvent()
-                        dialogue.start = main_dialogue[line_data['index']].start
-                        dialogue.end = main_dialogue[line_data['index']].end
-                        dialogue.style = 'Annotation EN'
-                        cefr_text = f" {{\\rAnnotation CEFR}}{replacement['cefr']}{{\\r}}" \
-                            if replacement['cefr'] else ""
-                        exam_text = f" {{\\rAnnotation EXAM}}{' '.join(replacement['exam_tags'])}{{\\r}}" \
-                            if replacement['exam_tags'] else ""
-                        __N = r'\N'
-                        phone_text = f"{__N}{{\\rAnnotation PHONE}}/{replacement['phonetics']}/{{\\r}}" if replacement['phonetics'] and self._show_phonetics else ""
-                        annot_text = f"{replacement['lemma']} {{\\rAnnotation POS}}{pos_map[replacement['pos']]}{{\\r}} {{\\rAnnotation ZH}}{replacement['Chinese']}{{\\r}}{cefr_text}{exam_text}{phone_text}"
+                        dialogue = SSAEvent()
+                        dialogue.start = main_processor[seg.index].start
+                        dialogue.end = main_processor[seg.index].end
+                        dialogue.style = "Annotation EN"
+                        cefr_text = (
+                            f" {style_text('Annotation CEFR', word.cefr)}"
+                            if word.cefr
+                            else ""
+                        )
+                        exam_text = (
+                            f" {style_text('Annotation EXAM', ' '.join(exams))}"
+                            if exams
+                            else ""
+                        )
+                        phone_text = (
+                            f"{__N}{style_text('Annotation PHONE', f'/{word.phonetics}/')}"
+                            if word.phonetics and self._show_phonetics
+                            else ""
+                        )
+                        annot_text = f"{word.lemma} {style_text('Annotation POS', UNIVERSAL_POS_MAP[word.pos] or '')} {style_text('Annotation ZH', word.llm_translation or '')}{cefr_text}{exam_text}{phone_text}"
                         dialogue.text = annot_text
                         ass_file.append(dialogue)
-                        if self._show_vocabulary_detail and replacement['pos_defs']:
-                            dialogue = pysubs2.SSAEvent()
-                            dialogue.start = main_dialogue[line_data['index']].start
-                            dialogue.end = main_dialogue[line_data['index']].end
-                            dialogue.style = 'DETAIL CN'
-                            detail_text = []
-                            for pos_def in replacement['pos_defs']:
-                                meaning_str = ', '.join(pos_def['meanings'])
-                                pos_text = f"{{\\rDETAIL POS}}{pos_def['pos']}{{\\r}} {meaning_str}"
-                                detail_text.append(pos_text)
-                            dialogue.text = '\\N'.join(detail_text)
+                        if word.llm_usage_context:
+                            dialogue = SSAEvent(
+                                start=main_processor[seg.index].start,
+                                style="DETAIL CN",
+                                end=main_processor[seg.index].end,
+                                text=style_text(
+                                    "Annotation USAGE", word.llm_usage_context
+                                ),
+                            )
                             ass_file.append(dialogue)
-                    replacement['new_text'] = new_text
-                LexiAnnot.replace_by_plaintext_positions(main_dialogue[line_data['index']], replacements)
+                        if self._show_vocabulary_detail and word.pos_defs:
+                            dialogue = SSAEvent(
+                                start=main_processor[seg.index].start,
+                                style="DETAIL CN",
+                                end=main_processor[seg.index].end,
+                            )
+                            detail_text = []
+                            for pos_def in word.pos_defs:
+                                meaning_str = ", ".join(pos_def.meanings)
+                                pos_text = f"{style_text('DETAIL POS', pos_def.pos)} {meaning_str}"
+                                detail_text.append(pos_text)
+                            dialogue.text = "\\N".join(detail_text)
+                            ass_file.append(dialogue)
+                    replacement = {
+                        "start": word.meta.start_pos,
+                        "end": word.meta.end_pos,
+                        "new_text": new_text,
+                    }
+                    replacements.append(replacement)
+                LexiAnnot.replace_by_plaintext_positions(
+                    main_processor[seg.index], replacements
+                )
             if self._sentence_translation:
-                chinese = line_data['Chinese']
-                if chinese and chinese[-1] in ['。', '，']:
+                chinese = seg.Chinese
+                if chinese and chinese[-1] in ["。", "，"]:
                     chinese = chinese[:-1]
-                main_dialogue[line_data['index']].text = main_dialogue[line_data['index']].text + f"\\N{chinese}"
+                main_processor[seg.index].text = (
+                        main_processor[seg.index].text + f"\\N{{\\fs{int(main_style_fs * 0.75)}}}{chinese}{{\\r}}"
+                )
 
         # 避免 Infuse 显示乱码
-        unexplainable_line = pysubs2.SSAEvent(start=0, end=0, text=f"{{\\rAnnotation ZH}}{self.plugin_name}{{\\r}}")
+        unexplainable_line = SSAEvent(
+            start=0, end=0, text=f"{style_text('Annotation ZH', self.plugin_name)}"
+        )
         ass_file.insert(0, unexplainable_line)
-        return ass_file
+        return ass_file, segments.statistics
diff --git a/plugins.v2/lexiannot/agenttool.py b/plugins.v2/lexiannot/agenttool.py
new file mode 100644
index 0000000..4bdc01f
--- /dev/null
+++ b/plugins.v2/lexiannot/agenttool.py
@@ -0,0 +1,67 @@
+import asyncio
+from typing import Optional, Type
+
+from pydantic import BaseModel
+
+from app.agent.tools.base import MoviePilotTool
+from app.core.plugin import PluginManager
+from .schemas import VocabularyAnnotatingToolInput
+
+
+class VocabularyAnnotatingTool(MoviePilotTool):
+    """自定义工具示例"""
+
+    # 工具名称
+    name: str = "vocabulary_annotating_tool"
+    # 工具描述
+    description: str = (
+        "Add new vocabulary annotation task to plugin LexiAnnot's task queue."
+    )
+    # 输入参数模型
+    args_schema: Type[BaseModel] = VocabularyAnnotatingToolInput
+
+    def get_tool_message(self, **kwargs) -> Optional[str]:
+        """根据订阅参数生成友好的提示消息"""
+        skip_existing = kwargs.get("skip_existing", False)
+        video_path = kwargs.get("video_path", "")
+        message = f"正在添加字幕任务: {video_path!r}"
+        if skip_existing:
+            message += "（覆写方式：跳过已存在的字幕文件）"
+        else:
+            message += "（覆写方式：覆盖已存在的字幕文件）"
+        return message
+
+    async def run(self, video_path: str, skip_existing: bool = True, **kwargs) -> str:
+        """
+        实现工具的核心逻辑（异步方法）
+
+        :param video_path: Path to the video file
+        :param skip_existing: Whether to skip existing subtitle files
+        :param kwargs: 其他参数，包含 explanation（工具使用说明）
+        :return: 工具执行结果，返回字符串格式
+        """
+        try:
+            # 执行工具逻辑
+            result = await self._perform_operation(video_path, skip_existing)
+
+            # 返回执行结果
+            if not result:
+                return f"成功添加词汇标注任务: {video_path!r}"
+            else:
+                return f"添加任务出错: {result}"
+        except Exception as e:
+            return f"执行失败: {str(e)}"
+
+    async def _perform_operation(
+        self, video_path: str, skip_existing: bool
+    ) -> str | None:
+        """内部方法，执行具体操作"""
+        # 实现具体业务逻辑
+        plugins = PluginManager().running_plugins
+        plugin_instance = plugins.get("LexiAnnot")
+        if not plugin_instance:
+            return "LexiAnnot 插件未运行"
+        await asyncio.to_thread(
+            plugin_instance.add_task, video_file=video_path, skip_existing=skip_existing
+        )
+        return None
diff --git a/plugins.v2/lexiannot/lexicon.py b/plugins.v2/lexiannot/lexicon.py
new file mode 100644
index 0000000..9c4f114
--- /dev/null
+++ b/plugins.v2/lexiannot/lexicon.py
@@ -0,0 +1,116 @@
+from typing import Literal
+
+from pydantic import BaseModel, Field, RootModel
+
+from .schemas import PosDef, Cefr
+
+
+class CefrEntry(BaseModel):
+    pos: Literal[
+        "noun",
+        "adverb",
+        "interjection",
+        "preposition",
+        "determiner",
+        "have-verb",
+        "modal auxiliary",
+        "adjective",
+        "number",
+        "be-verb",
+        "verb",
+        "conjunction",
+        "do-verb",
+        "infinitive-to",
+        "vern",
+        "pos",
+        "pronoun",
+    ] = Field(..., description="Part of speech")
+    cefr: Cefr = Field(..., description="CEFR level")
+    notes: str | None = Field(default=None, description="Notes")
+
+
+class CefrDictionary(RootModel):
+    root: dict[str, list[CefrEntry]]
+
+    def get(self, word: str) -> list[CefrEntry] | None:
+        return self.root.get(word)
+
+
+class Coca20KEntry(BaseModel):
+    index: int = Field(..., description="Index of the entry")
+    phonetics_1: str = Field(..., description="Phonetics style 1")
+    phonetics_2: str = Field(..., description="Phonetics style 2")
+    pos_defs: list[PosDef] = Field(
+        ..., description="List of part of speech definitions"
+    )
+
+
+class Coca20KDictionary(RootModel):
+    root: dict[str, Coca20KEntry]
+
+    def get(self, word: str) -> Coca20KEntry | None:
+        return self.root.get(word)
+
+
+class ShanBayDef(BaseModel):
+    # 'n.', 'v.', 'adv.', 'adj.', 'phrase.', 'int.', 'pron.', 'prep.', '.', 'conj.', 'num.', 'phrase v.', 'linkv.',
+    # 'det.', 'ordnumber.', 'prefix.', 'un.', 'vt.', 'mod. v.', 'abbr.', 'auxv.', 'modalv.', 'vi.', 'aux. v.',
+    # 'interj.', 'article.', 'infinitive.', 'suff.', 'ord.', 'art.', 'exclam.', 'n.[C]'
+    pos: str = Field(..., description="Part of speech")
+    definition_cn: str = Field(..., description="Definition in Chinese")
+
+
+class ShanbayEntry(BaseModel):
+    ipa_uk: str = Field(..., description="UK IPA pronunciation")
+    ipa_us: str = Field(..., description="US IPA pronunciation")
+    defs: list[ShanBayDef] = Field(..., description="List of definitions")
+
+
+class ShanbayDictionary(BaseModel):
+    """Dictionary entries for various examinations."""
+
+    cet4: dict[str, ShanbayEntry] = Field(
+        ..., alias="CET-4", description="CET-4 dictionary entries"
+    )
+    cet6: dict[str, ShanbayEntry] = Field(
+        ..., alias="CET-6", description="CET-6 dictionary entries"
+    )
+    npee: dict[str, ShanbayEntry] = Field(
+        ..., alias="NPEE", description="NPEE dictionary entries"
+    )
+    ielts: dict[str, ShanbayEntry] = Field(
+        ..., alias="IELTS", description="IELTS dictionary entries"
+    )
+    toefl: dict[str, ShanbayEntry] = Field(
+        ..., alias="TOEFL", description="TOEFL dictionary entries"
+    )
+    gre: dict[str, ShanbayEntry] = Field(
+        ..., alias="GRE", description="GRE dictionary entries"
+    )
+    tem4: dict[str, ShanbayEntry] = Field(
+        ..., alias="TEM-4", description="TEM-4 dictionary entries"
+    )
+    tem8: dict[str, ShanbayEntry] = Field(
+        ..., alias="TEM-8", description="TEM-8 dictionary entries"
+    )
+    pet: dict[str, ShanbayEntry] = Field(
+        ..., alias="PET", description="PET dictionary entries"
+    )
+
+    def query(self, word: str) -> dict[str, ShanbayEntry]:
+        result = {}
+        for field_name, field_info in ShanbayDictionary.model_fields.items():
+            value = getattr(self, field_name)
+            if word in value:
+                result[field_info.alias] = value[word]
+        return result
+
+
+class Lexicon(BaseModel):
+    cefr: CefrDictionary = Field(..., description="CEFR dictionary")
+    coca20k: Coca20KDictionary = Field(..., description="COCA 20K dictionary")
+    examinations: ShanbayDictionary = Field(
+        ..., description="Shanbay examinations dictionary"
+    )
+    swear_words: list[str] = Field(..., description="List of swear words")
+    version: str = Field(..., description="Version of the lexicon")
diff --git a/plugins.v2/lexiannot/pipeline.py b/plugins.v2/lexiannot/pipeline.py
new file mode 100644
index 0000000..657d59c
--- /dev/null
+++ b/plugins.v2/lexiannot/pipeline.py
@@ -0,0 +1,736 @@
+import re
+import threading
+
+from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_core.prompts import ChatPromptTemplate
+from langchain.output_parsers import PydanticOutputParser
+from pydantic import SecretStr
+
+from app.core.config import settings
+from app.schemas import Context
+from app.schemas.types import MediaType
+from app.log import logger
+from .lexicon import CefrDictionary, Lexicon, Coca20KDictionary
+from .schemas import (
+    SubtitleSegment,
+    PosDef,
+    Word,
+    Cefr,
+    WordMetadata,
+    SegmentList,
+    LlmFeedback,
+    UniversalPos,
+    LlmEnrichmentResult,
+    LlmTranslationResult,
+)
+from .spacyworker import SpacyWorker
+
+
+_patterns = [
+    r"\d+th|\d?1st|\d?2nd|\d?3rd",
+    r"\w+'s$",
+    r"\w+'d$",
+    r"\w+'t$",
+    "[Ii]'m$",
+    r"\w+'re$",
+    r"\w+'ve$",
+    r"\w+'ll$",
+]
+filter_patterns: list[re.Pattern] = [re.compile(p) for p in _patterns]
+pos_interests = {"NOUN", "VERB", "ADJ", "ADV", "ADP", "CCONJ", "SCONJ"}
+
+UNIVERSAL_POS_MAP: dict[UniversalPos, str] = {
+    UniversalPos.ADJ: "adj.",
+    UniversalPos.ADV: "adv.",
+    UniversalPos.INTJ: "int.",
+    UniversalPos.NOUN: "n.",
+    UniversalPos.PROPN: "n.",
+    UniversalPos.VERB: "v.",
+    UniversalPos.AUX: "aux.",
+    UniversalPos.ADP: "prep.",
+    UniversalPos.CCONJ: "conj.",
+    UniversalPos.SCONJ: "conj.",
+    UniversalPos.DET: "det.",
+    UniversalPos.NUM: "num.",
+    UniversalPos.PART: "part.",
+    UniversalPos.PRON: "pron.",
+    UniversalPos.PUNCT: None,
+    UniversalPos.SYM: None,
+    UniversalPos.X: None,
+}
+
+
+def initialize_llm(
+    provider: str,
+    api_key: str,
+    model_name: str,
+    base_url: str | None,
+    temperature: float = 0.1,
+    max_retries: int = 3,
+    proxy: bool = False,
+) -> BaseChatModel:
+    """初始化LLM模型"""
+
+    if provider == "google":
+        if proxy:
+            from langchain_openai import ChatOpenAI
+
+            return ChatOpenAI(
+                model=settings.LLM_MODEL,
+                api_key=SecretStr(api_key),
+                max_retries=3,
+                base_url="https://generativelanguage.googleapis.com/v1beta/openai",
+                temperature=settings.LLM_TEMPERATURE,
+                openai_proxy=settings.PROXY_HOST,
+            )
+        from langchain_google_genai import ChatGoogleGenerativeAI
+
+        return ChatGoogleGenerativeAI(
+            model=model_name,
+            google_api_key=api_key,  # noqa
+            max_retries=max_retries,
+            temperature=temperature,
+        )
+    elif provider == "deepseek":
+        from langchain_deepseek import ChatDeepSeek
+
+        return ChatDeepSeek(
+            model=model_name,
+            api_key=SecretStr(api_key),
+            max_retries=max_retries,
+            temperature=temperature,
+        )
+    else:
+        from langchain_openai import ChatOpenAI
+
+        return ChatOpenAI(
+            model=model_name,
+            api_key=SecretStr(api_key),
+            max_retries=max_retries,
+            base_url=base_url,
+            temperature=temperature,
+            openai_proxy=settings.PROXY_HOST if proxy else None,
+        )
+
+
+def convert_pos_to_spacy(pos: str):
+    """
+    将给定的词性列表转换为 spaCy 库中使用的词性标签
+
+    :param pos: 字符串形式词性
+    :returns: 一个包含对应spaCy词性标签的列表。对于无法直接映射的词性，将返回None
+    """
+    spacy_pos_map = {
+        "noun": "NOUN",
+        "adjective": "ADJ",
+        "adverb": "ADV",
+        "verb": "VERB",
+        "preposition": "ADP",
+        "conjunction": "CCONJ",
+        "determiner": "DET",
+        "pronoun": "PRON",
+        "interjection": "INTJ",
+        "number": "NUM",
+    }
+
+    pos_lower = pos.lower()
+    if pos_lower in spacy_pos_map:
+        spacy_pos = spacy_pos_map[pos_lower]
+    elif pos_lower == "be-verb":
+        spacy_pos = "AUX"  # Auxiliary verb (e.g., be, do, have)
+    elif pos_lower == "vern":
+        spacy_pos = "VERB"  # Assuming 'vern' is a typo for 'verb'
+    elif pos_lower == "modal auxiliary":
+        spacy_pos = "AUX"  # Modal verbs are also auxiliaries
+    elif pos_lower == "do-verb":
+        spacy_pos = "AUX"
+    elif pos_lower == "have-verb":
+        spacy_pos = "AUX"
+    elif pos_lower == "infinitive-to":
+        spacy_pos = "PART"  # Particle (e.g., to in "to go")
+    elif not pos_lower:  # Handle empty strings
+        spacy_pos = None
+    else:
+        spacy_pos = None  # For unmapped POS tags
+    return spacy_pos
+
+
+def convert_spacy_to_universal(spacy_pos: str) -> UniversalPos:
+    """
+    将 spaCy POS 标签转换为 UniversalPos 枚举
+    """
+    # 创建映射字典
+    pos_mapping = {
+        "ADJ": UniversalPos.ADJ,
+        "ADV": UniversalPos.ADV,
+        "INTJ": UniversalPos.INTJ,
+        "NOUN": UniversalPos.NOUN,
+        "PROPN": UniversalPos.PROPN,
+        "VERB": UniversalPos.VERB,
+        "AUX": UniversalPos.AUX,
+        # 介词/后置词
+        "ADP": UniversalPos.ADP,
+        # 连词
+        "CCONJ": UniversalPos.CCONJ,
+        "SCONJ": UniversalPos.SCONJ,
+        # 限定词
+        "DET": UniversalPos.DET,
+        # 数词
+        "NUM": UniversalPos.NUM,
+        # 代词
+        "PRON": UniversalPos.PRON,
+        # 小品词
+        "PART": UniversalPos.PART,
+        # 标点
+        "PUNCT": UniversalPos.PUNCT,
+        # 符号
+        "SYM": UniversalPos.SYM,
+        # 其他
+        "X": UniversalPos.X,
+        # 特殊处理：spaCy 可能返回的其他标签
+        "SPACE": UniversalPos.PUNCT,  # 空格当作标点处理
+        "CONJ": UniversalPos.CCONJ,  # 旧版 spaCy 的连词标签
+    }
+
+    # 转换为大写，确保一致
+    spacy_pos = spacy_pos.upper()
+
+    # 如果直接匹配，返回对应枚举
+    if spacy_pos in pos_mapping:
+        return pos_mapping[spacy_pos]
+
+    # 处理特殊情况：以特定前缀开头的标签
+    if spacy_pos.startswith("ADJ"):
+        return UniversalPos.ADJ
+    elif spacy_pos.startswith("ADV"):
+        return UniversalPos.ADV
+    elif spacy_pos.startswith("NOUN"):
+        return UniversalPos.NOUN
+    elif spacy_pos.startswith("VERB"):
+        return UniversalPos.VERB
+    elif spacy_pos.startswith("PROPN"):
+        return UniversalPos.PROPN
+    elif spacy_pos.startswith("PRON"):
+        return UniversalPos.PRON
+
+    # 默认返回 X（未知）
+    return UniversalPos.X
+
+
+def get_cefr_by_spacy(
+    lemma_: str, pos_: str, cefr_lexicon: CefrDictionary
+) -> Cefr | None:
+    word = lemma_.lower().strip("-*'")
+
+    result = cefr_lexicon.get(word)
+    if result:
+        all_cefr: list[Cefr] = []
+        if len(result) > 0:
+            for entry in result:
+                if pos_ == convert_pos_to_spacy(entry.pos):
+                    return entry.cefr
+                all_cefr.append(entry.cefr)
+        return min(all_cefr)
+    return None
+
+
+def query_coca20k(word: str, coca20k: Coca20KDictionary):
+    word = word.lower().strip("-*'")
+    return coca20k.get(word)
+
+
+def _update_word_via_lexicon(word: Word, lexi: Lexicon) -> Word:
+    """
+    使用词典信息更新单词对象
+
+    :param word: 需要更新的单词对象
+    :param lexi: 词典对象
+    :returns: 更新后的单词对象
+    """
+    # query dictionary
+    cefr = get_cefr_by_spacy(word.lemma, word.pos.value, lexi.cefr)
+    res_of_coca = query_coca20k(word.lemma, lexi.coca20k)
+    if res_of_coca and not cefr:
+        cefr = None
+    res_of_exams = lexi.examinations.query(word.lemma)
+    exam_tags = [exam_id for exam_id in res_of_exams if exam_id in res_of_exams]
+    pos_defs = []
+    phonetics = ""
+    if res_of_exams:
+        for exam, value in res_of_exams.items():
+            phonetics = value.ipa_uk
+            defs = {}
+            for pos_def in value.defs:
+                pos = pos_def.pos
+                definition_cn = pos_def.definition_cn
+                defs.setdefault(pos, []).append(definition_cn)
+            for pos, meanings in defs.items():
+                pos_defs.append(PosDef(pos=pos, meanings=meanings))
+            break
+    elif res_of_coca:
+        phonetics = res_of_coca.phonetics_1
+        pos_defs = res_of_coca.pos_defs
+    word.exams = exam_tags
+    word.cefr = cefr
+    word.pos_defs = pos_defs
+    word.phonetics = phonetics
+    return word
+
+
+def extract_advanced_words(
+    segment: SubtitleSegment,
+    lexi: Lexicon,
+    spacy_worker: SpacyWorker,
+    simple_level: set[Cefr],
+    exams: list[str],
+) -> list[Word]:
+    text = segment.clean_text
+    doc = spacy_worker.submit(text)
+    last_end_pos = 0
+    lemma_to_query = []
+    words = []
+    for token in doc.tokens:
+        # filter tokens
+        if (
+            len(token.text) == 1
+            or token.is_stop
+            or token.is_punct
+            or token.ent_iob_ != "O"
+        ):
+            continue
+        if token.pos_ not in pos_interests:
+            continue
+        if token.lemma_ in lexi.swear_words:
+            continue
+
+        striped = token.lemma_.strip("-[")
+        if any(p.match(striped) for p in filter_patterns):
+            continue
+
+        if striped in lemma_to_query:
+            continue
+        else:
+            lemma_to_query.append(striped)
+        striped_text = token.text.strip("-*[")
+        start_pos = text.find(striped_text, last_end_pos)
+        end_pos = start_pos + len(striped_text)
+
+        last_end_pos = end_pos
+        word = Word(
+            text=striped_text,
+            lemma=striped,
+            pos=convert_spacy_to_universal(token.pos_),
+            meta=WordMetadata(
+                start_pos=start_pos, end_pos=end_pos, context_id=segment.index
+            ),
+        )
+        word = _update_word_via_lexicon(word, lexi)
+        if word.cefr and word.cefr in simple_level:
+            continue
+        words.append(word)
+    return words
+
+
+def _find_segment_by_word_id(
+    segments: list[SubtitleSegment], word_id: int
+) -> SubtitleSegment | None:
+    for segment in segments:
+        for word in segment.candidate_words:
+            if word.meta.word_id == word_id:
+                return segment
+    return None
+
+
+def _update_word_metadata(
+    new_text: str, meta: WordMetadata, segment: SubtitleSegment
+) -> WordMetadata | None:
+    """
+    更新单词的元数据
+
+    :param new_text: 新的单词文本
+    :param meta: 单词的元数据对象
+    :param segment: 字幕片段对象
+    """
+    text = segment.clean_text
+    p_end = meta.end_pos
+    new_len = len(new_text)
+    i = meta.start_pos - new_len + 1
+    i = max(0, i)
+    j = p_end + min(0, (len(text) - (p_end + new_len)))
+
+    for x in range(i, j + 1):
+        text_view = text[x : (x + new_len)]
+        if text_view == new_text:
+            return WordMetadata(
+                start_pos=x,
+                end_pos=x + new_len,
+                context_id=segment.index,
+                word_id=meta.word_id,
+            )
+    return None
+
+
+def format_time_extended(milliseconds: int):
+    """
+    将秒数转换为时间格式
+
+    :param milliseconds: 整数，表示毫秒数
+    :return: 字符串，格式为 HH:MM:SS 或 HH:MM:SS.mmm
+    """
+    if milliseconds < 0:
+        sign = "-"
+        milliseconds = abs(milliseconds)
+    else:
+        sign = ""
+
+    hours = int(milliseconds // 3600000)
+    minutes = int((milliseconds % 3600000) // 60000)
+    seconds = (milliseconds % 60000) // 1000
+    milliseconds_remainder = milliseconds % 1000
+    return f"{sign}{hours:02d}:{minutes:02d}:{seconds:02d}.{milliseconds_remainder:03d}"
+
+
+def _context_process_chain(
+    lexi: Lexicon,
+    llm: BaseChatModel,
+    segments: list[SubtitleSegment],
+    start: int,
+    end: int,
+    leaner_level: str = "C1",
+    media_name: str | None = None,
+    translate_sentences: bool = False
+):
+    feedback_parser = PydanticOutputParser(pydantic_object=LlmFeedback)
+
+    def format_input(segment_list: list[SubtitleSegment]):
+        media_name_prefix = (
+            f"The following subtitles are from '{media_name}'.\n" if media_name else ""
+        )
+        return {
+            "media_name_prefix": media_name_prefix,
+            "context_text": " ".join([seg.clean_text for seg in segment_list]),
+            "candidate_words": "\n".join(
+                [
+                    f"- {word.text} (WORD_ID: {word.meta.word_id}, LEMMA: {word.lemma}, CEFR: {word.cefr}, POS: {word.pos})"
+                    for seg in segment_list
+                    for word in seg.candidate_words
+                ]
+            ),
+            "leaner_level": leaner_level,
+            "format_instructions": feedback_parser.get_format_instructions(),
+        }
+
+    def refactor_by_feedback(feedback: LlmFeedback):
+        # Process LLM feedback to update segments
+        for word in feedback.candidate_words_feedback:
+            seg = _find_segment_by_word_id(segments, word.word_id)
+            if not seg or seg.index < start or seg.index > end:
+                continue
+            # Update word info based on feedback
+            if not word.should_keep:
+                seg.candidate_words = [
+                    w for w in seg.candidate_words if w.meta.word_id != word.word_id
+                ]
+                continue
+            for w in seg.candidate_words:
+                if w.meta.word_id == word.word_id:
+                    word_text = word.text
+                    if word_text is not None and word.text != w.text:
+                        # Update metadata if text changed
+                        if word.text not in seg.clean_text:
+                            # If the word text is not found in the segment, skip updating metadata
+                            continue
+                        new_meta = _update_word_metadata(word_text, w.meta, seg)
+                        if not new_meta:
+                            continue
+                        w.meta = new_meta
+                        w.text = word_text
+                    if word.pos:
+                        w.pos = word.pos
+                    if word.lemma:
+                        w.lemma = word.lemma
+
+        # Add new words identified by LLM
+        for new_word in feedback.llm_identified_words:
+            for seg in segments:
+                if seg.index < start or seg.index > end:
+                    continue
+                start_pos = seg.clean_text.find(new_word.text)
+                if start_pos == -1:
+                    continue
+                if any(w.text == new_word.text for w in seg.candidate_words):
+                    continue
+                new_meta = WordMetadata(
+                    start_pos=start_pos,
+                    end_pos=start_pos + len(new_word.text),
+                    context_id=seg.index
+                )
+                built_word = Word(
+                    text=new_word.text,
+                    lemma=new_word.lemma,
+                    pos=new_word.pos,
+                    meta=new_meta
+                )
+                built_word = _update_word_via_lexicon(built_word, lexi)
+                if built_word.cefr and built_word.cefr < leaner_level:
+                    continue
+                seg.candidate_words.append(built_word)
+
+    prompt_template = ChatPromptTemplate.from_messages(
+        [
+            (
+                "system",
+                """You are an expert in linguistics and language learning. Your task is to analyze subtitle segments.
+Please perform the following tasks for an English learner at {leaner_level} CEFR level.
+
+**CRITICAL INSTRUCTION**: The learner is advanced. They already know common daily vocabulary.
+Your goal is to identify **only** content that helps them reach native-level proficiency.
+
+1.  **Review and Evaluate Candidate Words:**
+    *   **Goal**: Filter out simple words and correct any errors in lemma/POS/text.
+    *   **Action**: Return feedback items **ONLY** for words that:
+        1.  Should be **discarded** (too simple, trivial filler, profanity without cultural value). Set `should_keep` to `False`.
+        2.  Need **correction** (wrong lemma, POS, or text boundary). Set `should_keep` to `True` and provide correct values.
+    *   **Implicit Rule**: If a word is appropriate for the learner and has correct info, **DO NOT** include it in the output list.
+    *   **Keep criteria**: Keep simple words **ONLY IF** used in a non-literal, metaphorical, or idiomatic sense.
+    *   **Discard criteria**: Discard trivial conversational fillers ('gonna', 'wanna'), simple interjections, common profanity, and words below {leaner_level} level.
+
+2.  **Identify Missed Words:**
+    *   Identify any additional single words or phrases (typically 1-3 words) from the `context_text` that may be important for {leaner_level} learners. This specifically includes:
+        *   **Slang or informal expressions.**
+        *   **Internet terms or modern colloquialisms.**
+        *   **Words or phrases that require specific cultural background knowledge to understand.**
+        *   **Any other words or phrases that are challenging.**
+    *   Avoid repeating words already listed in `candidate_words`.
+    *   Must exist in the exact form in `context_text`.
+    *   Provide lemma and POS.
+    *   **Do NOT include** simple high-frequency words, common fillers ('gonna', 'gotta'), or basic swear words unless necessary for context.
+
+-------------------------
+You MUST return output strictly matching the provided Pydantic schema. 
+Return ONLY valid JSON.
+
+**Here are the output format instructions you MUST follow strictly:**
+{format_instructions}
+""",
+            ),
+            (
+                "human",
+                """{media_name_prefix}Here is the context from the subtitles:
+---
+{context_text}
+---
+Here are the candidate words identified by a basic algorithm:
+{candidate_words}
+""",
+            ),
+        ]
+    )
+    feedback_chain = (
+        format_input | prompt_template | llm.with_structured_output(LlmFeedback).with_retry(stop_after_attempt=3)
+    )
+    result: LlmFeedback = feedback_chain.invoke(segments)  # type: ignore
+    refactor_by_feedback(result)
+
+    # 丰富词义
+    if any(segment.candidate_words for segment in segments):
+        enrichment_prompt_template = ChatPromptTemplate.from_messages(
+            [
+                (
+                    "system",
+                    """You are a linguistics and English-learning expert. Your goal is to enhance vocabulary learning for Chinese users.\n
+For each word (identified by `WORD_ID`), provide:
+1.  **Translation:** A concise Chinese translation.
+2.  **Usage or Cultural Context (optional, in Chinese)**:
+    *   ONLY include if:
+        - The word has a specific meaning in this context that differs from its common definition;
+        - It is slang, idiom, phrasal, metaphorical, or culturally loaded;
+    *   ONLY provide this context when learners would likely struggle to understand the word's usage without it.
+
+**For each word, provide the `word_id` to ensure proper mapping.**
+**Your judgment should be based strictly on the provided subtitle context. DO NOT fabricate context or forced explanation.**
+
+-------------------------
+You MUST return output strictly matching the provided Pydantic schema.
+Return ONLY valid JSON. 
+
+**Here are the output format instructions you MUST follow strictly:**
+{format_instructions}
+""",
+                ),
+                (
+                    "human",
+                    """{media_name_prefix}Here is the context from the subtitles:
+---
+{context_text}
+---
+Here are the words you need to enrich:
+{words_to_enrich}
+""",
+                ),
+            ]
+        )
+        enrichment_parser = PydanticOutputParser(pydantic_object=LlmEnrichmentResult)
+
+        def format_enrichment_input(segment_list: list[SubtitleSegment]):
+            media_name_prefix = (
+                f"The following subtitles are from '{media_name}'.\n"
+                if media_name
+                else ""
+            )
+            words_to_enrich = []
+            for seg in segment_list:
+                if start <= seg.index <= end:
+                    for w in seg.candidate_words:
+                        words_to_enrich.append(
+                            f"- {w.text} (WORD_ID: {w.meta.word_id}, LEMMA: {w.lemma}, POS: {w.pos}, DEFINITIONS: {w.pos_defs_plaintext})"
+                        )
+            return {
+                "media_name_prefix": media_name_prefix,
+                "context_text": " ".join([seg.clean_text for seg in segment_list]),
+                "words_to_enrich": "\n".join(words_to_enrich),
+                "format_instructions": enrichment_parser.get_format_instructions(),
+            }
+
+        enrichment_chain = (
+            format_enrichment_input
+            | enrichment_prompt_template
+            | llm.with_structured_output(LlmEnrichmentResult).with_retry(stop_after_attempt=3)
+        )
+
+        enrichment_result: LlmEnrichmentResult = enrichment_chain.invoke(segments)  # type: ignore
+
+        for enriched_word_data in enrichment_result.enriched_words:
+            for segment in segments:
+                if segment.index < start or segment.index > end:
+                    continue
+                for candidate_word in segment.candidate_words:
+                    if candidate_word.meta.word_id == enriched_word_data.word_id:
+                        candidate_word.llm_translation = enriched_word_data.translation
+                        candidate_word.llm_usage_context = enriched_word_data.usage_context
+                        break
+    # 整句翻译
+    if translate_sentences:
+        translation_parser = PydanticOutputParser(pydantic_object=LlmTranslationResult)
+
+        translation_prompt_template = ChatPromptTemplate.from_messages(
+            [
+                (
+                    "system",
+                    """You are a professional subtitle translator. Your task is to translate English subtitle segments into natural, idiomatic Chinese.
+
+**Guidelines:**
+1.  **Tone & Style:** Maintain the original tone (e.g., casual, formal, humorous, dramatic).
+2.  **Context:** Use the surrounding segments to ensure continuity and correct meaning.
+3.  **Conciseness:** Subtitles have space constraints. Keep translations concise but accurate.
+4.  **Formatting:** Return the result strictly matching the provided JSON schema.
+
+-------------------------
+You MUST return output strictly matching the provided Pydantic schema.
+Return ONLY valid JSON.
+
+**Here are the output format instructions you MUST follow strictly:**
+{format_instructions}
+""",
+                ),
+                (
+                    "human",
+                    """{media_name_prefix}Here are the segments to translate:
+---
+{segments_text}
+---
+""",
+                ),
+            ]
+        )
+
+        def format_translation_input(segment_list: list[SubtitleSegment]):
+            media_name_prefix = (
+                f"The following subtitles are from '{media_name}'.\n"
+                if media_name
+                else ""
+            )
+            # Only translate segments within the current batch range (start to end)
+            segments_text_lines = []
+            for seg in segment_list:
+                if start <= seg.index <= end:
+                    segments_text_lines.append(f"ID {seg.index}: {seg.clean_text}")
+
+            return {
+                "media_name_prefix": media_name_prefix,
+                "segments_text": "\n".join(segments_text_lines),
+                "format_instructions": translation_parser.get_format_instructions(),
+            }
+
+        translation_chain = (
+            format_translation_input
+            | translation_prompt_template
+            | llm.with_structured_output(LlmTranslationResult).with_retry(stop_after_attempt=3)
+        )
+
+        try:
+            translation_result: LlmTranslationResult = translation_chain.invoke(segments)  # type: ignore
+
+            # Map translations back to segments
+            trans_map = {
+                t.index: t.translation for t in translation_result.translations
+            }
+            for segment in segments:
+                if segment.index in trans_map:
+                    segment.Chinese = trans_map[segment.index]
+        except Exception as e:
+            logger.error(f"Error during sentence translation: {e}")
+
+    return [segment for segment in segments if start <= segment.index <= end]
+
+
+def llm_process_chain(
+    lexi: Lexicon,
+    llm: BaseChatModel,
+    segments: SegmentList,
+    shutdown_event: threading.Event,
+    context_window: int = 30,
+    leaner_level: str = "C1",
+    media_context: Context | None = None,
+    translate_sentences: bool = False,
+) -> SegmentList:
+    """
+    根据 LLM 的反馈更新字幕片段中的单词信息
+
+    :param lexi: 词典对象
+    :param llm: 大语言模型对象
+    :param segments: 字幕片段
+    :param shutdown_event: 关闭事件
+    :param context_window: 上下文窗口大小
+    :param leaner_level: 学习者的 CEFR 水平
+    :param media_context: 媒体信息
+    :param translate_sentences: 是否翻译句子
+    :returns: 更新后的字幕片段列表
+    """
+    media_name = None
+    if media_context and media_context.media_info and media_context.meta_info:
+        media_info = media_context.media_info
+        if media_info.type == MediaType.TV:
+            media_name = (
+                f"{media_info.title_year} {media_context.meta_info.season_episode}"
+            )
+        else:
+            media_name = f"{media_info.title_year}"
+
+    segments_list = []
+    for context, (start, end) in segments.context_generator(
+        context_window=context_window, extra_len=2
+    ):
+        if shutdown_event.is_set():
+            break
+        logger.info(
+            f"Processing segments {format_time_extended(context[0].start_time)} ({context[0].index}) ->"
+            f" {format_time_extended(context[-1].end_time)} ({context[-1].index}) via LLM..."
+        )
+        segments_list.extend(
+            _context_process_chain(
+                lexi, llm, context, start, end, leaner_level, media_name, translate_sentences
+            )
+        )
+
+    return SegmentList(root=segments_list)
diff --git a/plugins.v2/lexiannot/query_gemini.py b/plugins.v2/lexiannot/query_gemini.py
deleted file mode 100644
index e583985..0000000
--- a/plugins.v2/lexiannot/query_gemini.py
+++ /dev/null
@@ -1,111 +0,0 @@
-import time
-from typing import Generic, List, TypeVar
-
-from google import genai
-from google.genai import types
-from pydantic import BaseModel
-
-
-class Context(BaseModel):
-    original_text: str
-
-
-class Vocabulary(BaseModel):
-    lemma: str
-    Chinese: str
-
-
-class TaskBase(BaseModel):
-    id: str
-
-
-class VocabularyTranslationTask(TaskBase):
-    vocabulary: List[Vocabulary]
-    context: Context
-    index: int
-
-
-class DialogueTranslationTask(TaskBase):
-    original_text: str
-    Chinese: str
-    index: int
-
-
-T = TypeVar("T", bound=TaskBase)
-
-
-class TranslationTasks(BaseModel, Generic[T]):
-    tasks: List[T]
-
-
-class GeminiResponse(BaseModel, Generic[T]):
-    tasks: List[T]
-    total_token_count: int
-    success: bool
-    message: str = ""
-
-
-def translate(
-    api_key: str,
-    translation_tasks: TranslationTasks[T],
-    system_instruction: str,
-    gemini_model: str = "gemini-2.0-flash",
-    temperature: float = 0.3,
-    max_retries: int = 3,
-    retry_delay: int = 10,
-) -> GeminiResponse[T]:
-    """
-    Query the Gemini API for translation tasks with retry logic.
-
-    :param api_key: Gemini API key
-    :param translation_tasks: Translation tasks
-    :param system_instruction: System instruction
-    :param gemini_model: Model name to use
-    :param temperature: Generation temperature
-    :param max_retries: Number of retry attempts
-    :param retry_delay: Delay between retries in seconds
-
-    returns: GeminiResponse containing the results
-    """
-
-
-    messages = []
-
-    response_schema = type(translation_tasks)
-
-    for attempt in range(1, max_retries + 1):
-        try:
-            client = genai.Client(api_key=api_key)
-            response = client.models.generate_content(
-                model=gemini_model,
-                contents=translation_tasks.model_dump_json(),
-                config=types.GenerateContentConfig(
-                    system_instruction=system_instruction,
-                    response_mime_type="application/json",
-                    response_schema=response_schema,
-                    temperature=temperature,
-                ),
-            )
-
-            if not response.parsed:
-                raise ValueError("Empty response from Gemini API")
-
-            translation_res = response.parsed
-            total_token_count = response.usage_metadata.total_token_count
-            return GeminiResponse(
-                tasks=translation_res.tasks,
-                total_token_count=total_token_count or 0,
-                success=True,
-            )
-
-        except Exception as e:
-            messages.append(f"Attempt {attempt} failed: {str(e)}")
-            if attempt < max_retries:
-                time.sleep(attempt*retry_delay)
-
-    return GeminiResponse(
-        tasks=[],
-        total_token_count=0,
-        success=False,
-        message="All retry attempts failed. " + "\n".join(messages),
-    )
\ No newline at end of file
diff --git a/plugins.v2/lexiannot/requirements.txt b/plugins.v2/lexiannot/requirements.txt
index bf64a6e..a82c2a2 100644
--- a/plugins.v2/lexiannot/requirements.txt
+++ b/plugins.v2/lexiannot/requirements.txt
@@ -1,5 +1,4 @@
 pysubs2~=1.8.0
 langdetect~=1.0.9
 pymediainfo~=7.0.1
-spacy~=3.8.7
-google-genai~=1.48.0
\ No newline at end of file
+spacy~=3.8.11
\ No newline at end of file
diff --git a/plugins.v2/lexiannot/schemas.py b/plugins.v2/lexiannot/schemas.py
new file mode 100644
index 0000000..32c18ff
--- /dev/null
+++ b/plugins.v2/lexiannot/schemas.py
@@ -0,0 +1,394 @@
+import re
+import uuid
+from collections import Counter
+from enum import Enum
+from typing import Literal, Generator, Iterator
+
+from pydantic import BaseModel, Field, RootModel, model_validator
+
+from app.utils.singleton import Singleton
+
+
+Cefr = Literal["C2", "C1", "B2", "B1", "A2", "A1"]
+
+
+class UniversalPos(str, Enum):
+    """Universal Part-of-Speech tags"""
+
+    ADJ = "ADJ"  # Adjective
+    ADV = "ADV"  # Adverb
+    INTJ = "INTJ"  # Interjection
+    NOUN = "NOUN"  # Noun
+    PROPN = "PROPN"  # Proper noun
+    VERB = "VERB"  # Verb
+    ADP = "ADP"  # Adposition (preposition/postposition)
+    AUX = "AUX"  # Auxiliary verb
+    CCONJ = "CCONJ"  # Coordinating conjunction
+    DET = "DET"  # Determiner
+    NUM = "NUM"  # Numeral
+    PART = "PART"  # Particle
+    PRON = "PRON"  # Pronoun
+    SCONJ = "SCONJ"  # Subordinating conjunction
+    PUNCT = "PUNCT"  # Punctuation
+    SYM = "SYM"  # Symbol
+    X = "X"  # Other/unknown
+
+
+class IDGenerator(metaclass=Singleton):
+    """Singleton class for generating unique IDs."""
+
+    _counter = 0
+
+    def next_id(self):
+        self._counter += 1
+        return self._counter
+
+    def reset(self):
+        self._counter = 0
+
+
+class TaskStatus(Enum):
+    PENDING = "pending"
+    RUNNING = "running"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    CANCELED = "canceled"
+    IGNORED = "ignored"
+
+
+class TaskParams(BaseModel):
+    skip_existing: bool = Field(
+        default=True, description="Whether to skip existing subtitle files"
+    )
+
+
+class TasksApiParams(BaseModel):
+    operation: Literal["DELETE", "RETRY", "IGNORE"] = Field(
+        ..., description="Operation to perform on the tasks"
+    )
+    task_id: str | None = Field(
+        default=None, description="Unique identifier for the task"
+    )
+
+
+class SegmentStatistics(BaseModel):
+    total_segments: int = Field(default=0, description="Total number of subtitle segments")
+    total_words: int = Field(default=0, description="Total number of candidate words")
+    cefr_distribution: dict[str, int] = Field(
+        default_factory=dict, description="Distribution of words by CEFR level"
+    )
+    pos_distribution: dict[str, int] = Field(
+        default_factory=dict, description="Distribution of words by Part of Speech"
+    )
+    exam_distribution: dict[str, int] = Field(
+        default_factory=dict, description="Distribution of words by Examination"
+    )
+
+    def to_string(self) -> str:
+        cefr_str = ", ".join(
+            [f"{level}({count})" for level, count in self.cefr_distribution.items()]
+        )
+        pos_str = ", ".join(
+            [f"{pos}({count})" for pos, count in self.pos_distribution.items()]
+        )
+        exam_str = ", ".join([f"{exam}({count})" for exam, count in self.exam_distribution.items()])
+        return (
+            f"Total Segments: {self.total_segments}\n"
+            f"Total Words: {self.total_words}\n"
+            f"CEFR Distribution: {cefr_str if cefr_str else 'N/A'}\n"
+            f"POS Distribution: {pos_str if pos_str else 'N/A'}\n"
+            f"Exam Distribution: {exam_str if exam_str else 'N/A'}"
+        )
+
+
+class ProcessResult(BaseModel):
+    """Result of processing a task."""
+
+    message: str | None = Field(
+        default=None, description="Additional message or error information"
+    )
+    status: TaskStatus = Field(
+        default=TaskStatus.PENDING, description="Current status of the task"
+    )
+    statistics: SegmentStatistics | None = Field(default=None, description="Statistics of the task")
+
+
+class Task(BaseModel):
+    video_path: str = Field(..., description="Path to the video file")
+    task_id: str = Field(
+        default_factory=lambda: str(uuid.uuid4()),
+        description="Unique identifier for the task",
+    )
+    status: TaskStatus = Field(
+        default=TaskStatus.PENDING, description="Current status of the task"
+    )
+    add_time: str | None = Field(
+        default=None, description="Add time of the task, format %Y-%m-%d %H:%M:%S"
+    )
+    complete_time: str | None = Field(
+        default=None, description="Complete time of the task"
+    )
+    tokens_used: int = Field(default=0, description="Number of used tokens")
+    message: str | None = Field(
+        default=None, description="Additional message or error information"
+    )
+    params: TaskParams = Field(
+        default_factory=TaskParams, description="Parameters for the task"
+    )
+    statistics: SegmentStatistics | None = Field(default=None, description="Statistics of the task")
+
+
+class WordMetadata(BaseModel):
+    start_pos: int = Field(
+        ..., description="Start position of the word in the context sentence"
+    )
+    end_pos: int = Field(
+        ..., description="End position of the word in the context sentence"
+    )
+    context_id: int = Field(..., description="Identifier of the context sentence")
+    word_id: int = Field(
+        default_factory=lambda: IDGenerator().next_id(),
+        description="Identifier of the word in the context",
+    )
+
+
+class PosDef(BaseModel):
+    # 'art.', 'v.', 'aux.', 'conj.', 'prep.', 'adv.', 'adj.', 'n.', 'vt.', 'pron.', 'det.', 'vi.', 'int.'
+    # 'num.', 'abbr.', 'na.', 'quant.', 'phr.'
+    pos: str = Field(..., description="Part of speech")
+    meanings: list[str] = Field(..., description="List of definitions")
+
+    @property
+    def plaintext(self):
+        return f"{self.pos} {'; '.join(self.meanings)}"
+
+
+class WordBase(BaseModel):
+    text: str = Field(..., description="The word or phrase")
+    lemma: str = Field(..., description="Lemma form of the word")
+    pos: UniversalPos = Field(
+        default=UniversalPos.X, description="Universal POS tag of the word"
+    )
+
+
+class Word(WordBase):
+    phonetics: str | None = Field(
+        default=None, description="Phonetic transcription of the word"
+    )
+    meta: WordMetadata = Field(
+        default_factory=WordMetadata, description="Additional metadata"
+    )
+    cefr: Cefr | None = Field(default=None, description="CEFR level")
+    exams: list[str] = Field(
+        default_factory=list,
+        description="Exams whose vocabulary syllabus include this word",
+    )
+    pos_defs: list[PosDef] = Field(
+        default_factory=list, description="Part of speech definitions"
+    )
+    llm_translation: str | None = Field(
+        default=None, description="LLM generated Chinese translation"
+    )
+    llm_usage_context: str | None = Field(
+        default=None, description="LLM generated cultural context"
+    )
+    llm_example_sentences: list[str] = Field(
+        default_factory=list, description="LLM generated example sentences"
+    )
+
+    @property
+    def pos_defs_plaintext(self) -> str:
+        return " ".join(
+            [
+                f"{index}. {pos_def.plaintext}"
+                for index, pos_def in enumerate(self.pos_defs)
+            ]
+        )
+
+
+class SubtitleSegment(BaseModel):
+    index: int = Field(..., description="Index of the subtitle segment")
+    start_time: int = Field(
+        ..., description="Start time of the subtitle segment in milliseconds"
+    )
+    end_time: int = Field(
+        ..., description="End time of the subtitle segment in milliseconds"
+    )
+    plaintext: str = Field(..., description="Text content of the subtitle segment")
+    Chinese: str | None = Field(
+        default=None, description="Chinese translation of the subtitle segment"
+    )
+    candidate_words: list[Word] = Field(
+        default_factory=list, description="List of words worth learning in the segment"
+    )
+
+    def words_append(self, word: Word):
+        """
+        向字幕片段中添加一个单词到 words_worth_larning 列表中。
+
+        :param word: 要添加的单词对象。
+        """
+        self.candidate_words.append(word)
+
+    @staticmethod
+    def _replace_with_spaces(_text):
+        """
+        使用等长的空格替换文本中的 [xxx] 模式。
+        例如："[Hi]" 会被替换成 "    " (4个空格)
+        """
+        pattern = r"(\[.*?\])"
+        return re.sub(pattern, lambda match: " " * len(match.group(1)), _text)
+
+    @property
+    def clean_text(self) -> str:
+        """
+        获取清理后的文本内容，去除换行符并将 [xxx] 模式替换为空格。
+        """
+        return SubtitleSegment._replace_with_spaces(self.plaintext.replace("\n", " "))
+
+    def __lt__(self, other: object):
+        if not isinstance(other, SubtitleSegment):
+            return NotImplemented
+        return self.index < other.index
+
+
+class SegmentList(RootModel):
+    root: list[SubtitleSegment] = Field(
+        default_factory=list, description="List of subtitle segments"
+    )
+
+    @property
+    def statistics(self) -> SegmentStatistics:
+        all_words = [word for seg in self.root for word in seg.candidate_words]
+
+        cefr_counts = Counter(word.cefr if word.cefr else "Other" for word in all_words)
+        pos_counts = Counter(word.pos.value if word.pos else "Other" for word in all_words)
+        exam_counts = Counter(exam for word in all_words for exam in word.exams)
+
+        return SegmentStatistics(
+            total_segments=len(self.root),
+            total_words=len(all_words),
+            cefr_distribution=dict(cefr_counts),
+            pos_distribution=dict(pos_counts),
+        )
+
+    def context_generator(
+        self, context_window: int, extra_len: int = 1
+    ) -> Generator[tuple[list[SubtitleSegment], tuple[int, int]], None, None]:
+        """
+        生成包含上下文窗口的字幕片段列表
+
+        :param context_window: 上下文窗口大小
+        :param extra_len: 额外长度，用于调整窗口大小
+        :yield: 包含上下文的字幕片段列表。
+        """
+        total_segments = len(self.root)
+        for i in range(total_segments // context_window + 1):
+            real_start = i * context_window
+            real_end = min(total_segments, (i + 1) * context_window) - 1
+            start_index = max(0, i * context_window - extra_len)
+            end_index = min(total_segments, (i + 1) * context_window + extra_len)
+            yield (
+                self.root[start_index:end_index],
+                (self.root[real_start].index, self.root[real_end].index),
+            )
+
+    def sort(self):
+        self.root.sort()
+
+    @model_validator(mode="after")
+    def sort_root(self):
+        self.root.sort()
+        return self
+
+    def __iter__(self) -> Iterator[SubtitleSegment]:
+        return iter(self.root)
+
+
+class SpacyToken(BaseModel):
+    lemma_: str = Field(..., description="Lemma form of the word (string)")
+    pos_: str = Field(..., description="POS tag of the word")
+    text: str = Field(..., description="Text of the word")
+    is_stop: bool = Field(
+        default=False, description="Indicates if the word is a stop word"
+    )
+    is_punct: bool = Field(
+        default=False, description="Indicates if the word is punctuation"
+    )
+    ent_iob_: str = Field(..., description="Entity IOB")
+
+
+class SpacyNamedEntity(BaseModel):
+    text: str = Field(..., description="Text of the entity")
+    label_: str = Field(..., description="Label of the entity")
+
+
+class NlpResult(BaseModel):
+    tokens: list[SpacyToken] = Field(default_factory=list, description="List of tokens")
+    entities: list[SpacyNamedEntity] = Field(
+        default_factory=list, description="List of named entities"
+    )
+
+
+class LlmFeedbackAboutCandidateWord(BaseModel):
+    should_keep: bool = Field(
+        ..., description="Indicates whether to keep the candidate word"
+    )
+    # reason: str | None = Field(default=None, description="Concise reason for the decision")
+    word_id: int = Field(..., description="Identifier of the word in the context")
+    text: str | None = Field(default=None, description="The vocabulary word or phrase")
+    lemma: str | None = Field(default=None, description="Lemma form of the word")
+    pos: UniversalPos | None = Field(
+        default=None,
+        description="Universal POS tag of the word. Options: ADJ, ADV, INTJ, NOUN, PROPN, "
+        "VERB, ADP, AUX, CCONJ, DET, NUM, PART, PRON, SCONJ, PUNCT, SYM, X",
+    )
+
+
+class LlmFeedback(BaseModel):
+    candidate_words_feedback: list[LlmFeedbackAboutCandidateWord] = Field(
+        default_factory=list, description="Feedback about candidate words."
+    )
+    llm_identified_words: list[WordBase] = Field(
+        default_factory=list, description="List of words identified by the LLM."
+    )
+
+
+class LlmWordEnrichment(BaseModel):
+    word_id: int = Field(..., description="Identifier of the word in the context")
+    translation: str | None = Field(
+        default=None, description="Chinese translation of the word"
+    )
+    usage_context: str | None = Field(
+        default=None, description="Usage or Cultural Context"
+    )
+
+
+class LlmEnrichmentResult(BaseModel):
+    enriched_words: list[LlmWordEnrichment] = Field(
+        default_factory=list, description="List of enriched word data."
+    )
+
+
+class LlmSegmentTranslation(BaseModel):
+    index: int = Field(..., description="Index of the subtitle segment")
+    translation: str = Field(
+        ..., description="Natural Chinese translation of the segment"
+    )
+
+
+class LlmTranslationResult(BaseModel):
+    translations: list[LlmSegmentTranslation] = Field(
+        default_factory=list, description="List of segment translations"
+    )
+
+
+class VocabularyAnnotatingToolInput(BaseModel):
+    explanation: str = Field(
+        ...,
+        description="This is a tool for adding a new vocabulary-annotating task to AnnotLexi.",
+    )
+    video_path: str = Field(..., description="Path to the video file")
+    skip_existing: bool = Field(
+        default=True, description="Whether to skip existing subtitle files"
+    )
diff --git a/plugins.v2/lexiannot/spacyworker.py b/plugins.v2/lexiannot/spacyworker.py
index 496c1e7..f861325 100644
--- a/plugins.v2/lexiannot/spacyworker.py
+++ b/plugins.v2/lexiannot/spacyworker.py
@@ -1,29 +1,28 @@
 from multiprocessing import Process, Queue
-from typing import Dict, List
 
 import spacy
 from spacy.tokenizer import Tokenizer
 
 from app.core.cache import cached
 from app.log import logger
+from .schemas import SpacyNamedEntity, SpacyToken, NlpResult
 
 
 class SpacyWorker:
-
-    def __init__(self, model='en_core_web_sm'):
+    def __init__(self, model="en_core_web_sm"):
         self.task_q = Queue()
         self.result_q = Queue()
         self.status_q = Queue()
         self.model = model
 
         # 启动子进程
-        logger.info(f"正在启动 SpacyWorker 子进程...")
+        logger.info("正在启动 SpacyWorker 子进程...")
         self.proc = Process(target=self.run, args=(self.model,))
         self.proc.start()
 
         # 等待子进程返回模型加载状态
         status, info = self.status_q.get()
-        if status == 'error':
+        if status == "error":
             self.proc.join()
             raise RuntimeError(f"spaCy 模型加载失败: {info}")
         else:
@@ -39,35 +38,50 @@ class SpacyWorker:
         try:
             nlp = SpacyWorker.load_nlp(model)
             infixes = list(nlp.Defaults.infixes)
-            infixes = [i for i in infixes if '-' not in i]
+            infixes = [i for i in infixes if "-" not in i]
             infix_re = spacy.util.compile_infix_regex(infixes)
             nlp.tokenizer = Tokenizer(
                 nlp.vocab,
                 prefix_search=nlp.tokenizer.prefix_search,
                 suffix_search=nlp.tokenizer.suffix_search,
                 infix_finditer=infix_re.finditer,
-                token_match=nlp.tokenizer.token_match
+                token_match=nlp.tokenizer.token_match,
             )
         except Exception as e:
-            self.status_q.put(('error', str(e)))
+            self.status_q.put(("error", str(e)))
             return
 
         # 告诉主进程加载成功
-        self.status_q.put(('ok', None))
+        self.status_q.put(("ok", None))
 
         while True:
             text = self.task_q.get()
             if text is None:
                 break
             doc = nlp(text)
-            self.result_q.put([{'text': token.text, 'pos_': token.pos_, 'lemma_': token.lemma_} for token in doc])
+            tokens = []
+            entities = []
+            for token in doc:
+                tokens.append(
+                    SpacyToken(
+                        lemma_=token.lemma_,
+                        pos_=token.pos_,
+                        text=token.text,
+                        is_stop=token.is_stop,
+                        is_punct=token.is_punct,
+                        ent_iob_=token.ent_iob_,
+                    )
+                )
+            for ent in doc.ents:
+                entities.append(SpacyNamedEntity(text=ent.text, label_=ent.label_))
+            self.result_q.put(NlpResult(tokens=tokens, entities=entities))
 
     @staticmethod
     @cached(maxsize=1, ttl=3600 * 6)
     def load_nlp(model: str) -> spacy.Language:
         return spacy.load(model)
 
-    def submit(self, text: str) -> List[Dict[str, str]]:
+    def submit(self, text: str) -> NlpResult:
         """
         提交任务并等待结果
         """
diff --git a/plugins.v2/lexiannot/subtitle.py b/plugins.v2/lexiannot/subtitle.py
new file mode 100644
index 0000000..9477713
--- /dev/null
+++ b/plugins.v2/lexiannot/subtitle.py
@@ -0,0 +1,44 @@
+from typing import Generator, Any, overload
+
+from pysubs2 import SSAEvent
+
+from .schemas import SubtitleSegment
+
+
+class SubtitleProcessor:
+    def __init__(self):
+        self._events: list[SSAEvent] = []
+
+    def append(self, event: SSAEvent):
+        self._events.append(event)
+
+    def segment_generator(self) -> Generator[SubtitleSegment, None, None]:
+        for index, event in enumerate(self._events):
+            yield SubtitleSegment(
+                index=index,
+                start_time=event.start,
+                end_time=event.end,
+                plaintext=event.plaintext,
+            )
+
+    @overload
+    def __getitem__(self, item: int) -> SSAEvent:
+        pass
+
+    @overload
+    def __getitem__(self, s: slice) -> list[SSAEvent]:
+        pass
+
+    def __getitem__(self, item: Any) -> Any:
+        return self._events[item]
+
+
+def style_text(style: str, text: str) -> str:
+    """
+    使用指定的样式包装文本。
+
+    :param style: 样式名称
+    :param text: 要包装的文本
+    :return: 包含样式的文本
+    """
+    return f"{{\\r{style}}}{text}{{\\r}}"