feat(lexiannot): Add QueryAnnotationTasksTool

This commit is contained in:
wumode
2025-12-12 12:52:45 +08:00
parent 4d8f36f674
commit 77b34dba5c
4 changed files with 123 additions and 131 deletions

View File

@@ -1,3 +1,4 @@
import copy
import os
import json
import queue
@@ -31,7 +32,7 @@ from app.schemas.types import EventType
from app.core.context import MediaInfo
from app.chain.media import MediaChain
from .agenttool import VocabularyAnnotatingTool
from .agenttool import QueryAnnotationTasksTool, VocabularyAnnotatingTool
from .lexicon import Lexicon
from .schemas import (
IDGenerator,
@@ -181,7 +182,7 @@ class LexiAnnot(_PluginBase):
# 从字典中恢复队列
with self._tasks_lock:
for task_id, task in self._tasks.items():
if task.status == TaskStatus.PENDING:
if task.status in {TaskStatus.PENDING, TaskStatus.RUNNING}:
self._task_queue.put(task)
self._shutdown_event = threading.Event()
@@ -361,26 +362,11 @@ class LexiAnnot(_PluginBase):
"model": "annot_level",
"label": "标注词汇的最低CEFR等级",
"items": [
{
"title": "B1",
"value": "B1",
},
{
"title": "B2",
"value": "B2",
},
{
"title": "C1",
"value": "C1",
},
{
"title": "C2",
"value": "C2",
},
{
"title": "C2+",
"value": "C2+",
},
{"title": "B1", "value": "B1"},
{"title": "B2", "value": "B2"},
{"title": "C1", "value": "C1"},
{"title": "C2", "value": "C2"},
{"title": "C2+", "value": "C2+"},
],
},
}
@@ -412,42 +398,15 @@ class LexiAnnot(_PluginBase):
"chips": True,
"multiple": True,
"items": [
{
"title": "",
"value": "CET-4",
},
{
"title": "六级",
"value": "CET-6",
},
{
"title": "考研",
"value": "NPEE",
},
{
"title": "雅思",
"value": "IELTS",
},
{
"title": "托福",
"value": "TOEFL",
},
{
"title": "专四",
"value": "TEM-4",
},
{
"title": "专八",
"value": "TEM-8",
},
{
"title": "GRE",
"value": "GRE",
},
{
"title": "PET",
"value": "PET",
},
{"title": "四级", "value": "CET-4"},
{"title": "", "value": "CET-6"},
{"title": "考研", "value": "NPEE"},
{"title": "雅思", "value": "IELTS"},
{"title": "托福", "value": "TOEFL"},
{"title": "专四", "value": "TEM-4"},
{"title": "专八", "value": "TEM-8"},
{"title": "GRE", "value": "GRE"},
{"title": "PET", "value": "PET"},
],
},
}
@@ -496,30 +455,12 @@ class LexiAnnot(_PluginBase):
"model": "font_scaling",
"label": "字体缩放",
"items": [
{
"title": "50%",
"value": "0.5",
},
{
"title": "75%",
"value": "0.75",
},
{
"title": "100%",
"value": "1",
},
{
"title": "125%",
"value": "1.25",
},
{
"title": "150%",
"value": "1.5",
},
{
"title": "200%",
"value": "2",
},
{"title": "50%", "value": "0.5"},
{"title": "75%", "value": "0.75"},
{"title": "100%", "value": "1"},
{"title": "125%", "value": "1.25"},
{"title": "150%", "value": "1.5"},
{"title": "200%", "value": "2"}
],
},
}
@@ -549,26 +490,11 @@ class LexiAnnot(_PluginBase):
"model": "opacity",
"label": "透明度",
"items": [
{
"title": "0",
"value": "0",
},
{
"title": "25%",
"value": "63",
},
{
"title": "50%",
"value": "127",
},
{
"title": "75%",
"value": "191",
},
{
"title": "100%",
"value": "255",
},
{"title": "0", "value": "0"},
{"title": "25%", "value": "63"},
{"title": "50%", "value": "127"},
{"title": "75%", "value": "191"},
{"title": "100%", "value": "255"},
],
},
}
@@ -1100,7 +1026,7 @@ class LexiAnnot(_PluginBase):
获取插件智能体工具
返回工具类列表,每个工具类必须继承自 MoviePilotTool
"""
return [VocabularyAnnotatingTool]
return [VocabularyAnnotatingTool, QueryAnnotationTasksTool]
def stop_service(self):
"""
@@ -1170,7 +1096,12 @@ class LexiAnnot(_PluginBase):
}
self.save_data("tasks", tasks_dict)
def add_task(self, video_file: str, skip_existing=True):
def get_tasks(self) -> list[Task]:
return [copy.deepcopy(task) for task in self._tasks.values()]
def add_task(self, video_file: str, skip_existing=True) -> bool:
if not self._enabled:
return False
task = Task(
video_path=video_file,
add_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
@@ -1181,6 +1112,7 @@ class LexiAnnot(_PluginBase):
self._task_queue.put(task)
self.save_tasks()
logger.info(f"加入任务队列: {video_file}")
return True
def add_media_file(self, path: str, skip_existing: bool = True):
"""
@@ -1193,10 +1125,7 @@ class LexiAnnot(_PluginBase):
def delete_tasks(self, task_id: str | None):
historical_status = {
TaskStatus.COMPLETED,
TaskStatus.FAILED,
TaskStatus.CANCELED,
TaskStatus.IGNORED,
TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.CANCELED, TaskStatus.IGNORED,
}
with self._tasks_lock:
if task_id is None:
@@ -1271,9 +1200,8 @@ class LexiAnnot(_PluginBase):
if context and context.media_info and context.meta_info:
media_info = context.media_info
if media_info.type == MediaType.TV:
media_name = (
f"{media_info.title_year} {context.meta_info.season_episode}"
)
media_name = f"{media_info.title_year} {context.meta_info.season_episode}"
else:
media_name = f"{media_info.title_year}"
message = f"标题: {media_name}"
@@ -1979,8 +1907,7 @@ class LexiAnnot(_PluginBase):
segment=seg,
lexi=lexi,
spacy_worker=spacy_worker,
simple_level=simple_vocabulary,
exams=self._exam_tags,
simple_level=simple_vocabulary
)
if self._gemini_available:
if self._use_mp_agent:

View File

@@ -5,11 +5,11 @@ from pydantic import BaseModel
from app.agent.tools.base import MoviePilotTool
from app.core.plugin import PluginManager
from .schemas import VocabularyAnnotatingToolInput
from .schemas import VocabularyAnnotatingToolInput, QueryAnnotationTasksToolInput, Task
class VocabularyAnnotatingTool(MoviePilotTool):
"""自定义工具示例"""
"""词汇标注工具"""
# 工具名称
name: str = "vocabulary_annotating_tool"
@@ -61,7 +61,74 @@ class VocabularyAnnotatingTool(MoviePilotTool):
plugin_instance = plugins.get("LexiAnnot")
if not plugin_instance:
return "LexiAnnot 插件未运行"
await asyncio.to_thread(
res = await asyncio.to_thread(
plugin_instance.add_task, video_file=video_path, skip_existing=skip_existing
)
if not res:
return "任务添加失败"
return None
class QueryAnnotationTasksTool(MoviePilotTool):
"""词汇标注任务查询工具"""
# 工具名称
name: str = "query_annotation_tasks_tool"
# 工具描述
description: str = (
"Query the latest vocabulary annotation tasks from plugin LexiAnnot."
)
# 输入参数模型
args_schema: Type[BaseModel] = QueryAnnotationTasksToolInput
def get_tool_message(self, **kwargs) -> Optional[str]:
"""根据订阅参数生成友好的提示消息"""
count = kwargs.get("count", 5)
return f"正在查询最近的 {count} 条字幕标注任务"
async def run(self, count: int, **kwargs) -> str:
"""
实现工具的核心逻辑(异步方法)
:param count: The max number of returned annotation tasks
:param kwargs: 其他参数,包含 explanation工具使用说明
:return: 工具执行结果,返回字符串格式
"""
try:
# 执行工具逻辑
plugins = PluginManager().running_plugins
plugin_instance = plugins.get("LexiAnnot")
if not plugin_instance:
return "LexiAnnot 插件未运行"
total: list[Task] = plugin_instance.get_tasks()
# Handle potential None in add_time
total.sort(key=lambda t: t.add_time or "", reverse=True)
tasks = total[:count]
if not tasks:
return "未查询到相关任务"
result_lines = [f"最近 {len(tasks)} 条标注任务:"]
for task in tasks:
status_val = (
task.status.value
if hasattr(task.status, "value")
else str(task.status)
)
info = f"\n🎥 **{task.video_path}**"
info += f"\n ID: {task.task_id}"
info += f"\n Status: {status_val}"
info += f"\n Added: {task.add_time or 'N/A'}"
if task.complete_time:
info += f"\n Completed: {task.complete_time}"
if task.message:
info += f"\n Message: {task.message}"
if task.statistics:
info += f"\n Words: {task.statistics.total_words} | Segments: {task.statistics.total_segments}"
result_lines.append(info)
return "\n".join(result_lines)
except Exception as e:
return f"执行失败: {str(e)}"

View File

@@ -69,7 +69,7 @@ def initialize_llm(
max_retries: int = 3,
proxy: bool = False,
) -> BaseChatModel:
"""初始化LLM模型"""
"""初始化 LLM"""
if provider == "google":
if proxy:
@@ -277,13 +277,8 @@ def _update_word_via_lexicon(word: Word, lexi: Lexicon) -> Word:
return word
def extract_advanced_words(
segment: SubtitleSegment,
lexi: Lexicon,
spacy_worker: SpacyWorker,
simple_level: set[Cefr],
exams: list[str],
) -> list[Word]:
def extract_advanced_words(segment: SubtitleSegment, lexi: Lexicon, spacy_worker: SpacyWorker,
simple_level: set[Cefr]) -> list[Word]:
text = segment.clean_text
doc = spacy_worker.submit(text)
last_end_pos = 0
@@ -331,9 +326,7 @@ def extract_advanced_words(
return words
def _find_segment_by_word_id(
segments: list[SubtitleSegment], word_id: int
) -> SubtitleSegment | None:
def _find_segment_by_word_id(segments: list[SubtitleSegment], word_id: int) -> SubtitleSegment | None:
for segment in segments:
for word in segment.candidate_words:
if word.meta.word_id == word_id:
@@ -705,7 +698,7 @@ def llm_process_chain(
根据 LLM 的反馈更新字幕片段中的单词信息
:param lexi: 词典对象
:param llm: 大语言模型对象
:param llm: LLM 对象
:param segments: 字幕片段
:param shutdown_event: 关闭事件
:param context_window: 上下文窗口大小

View File

@@ -34,7 +34,7 @@ class UniversalPos(str, Enum):
X = "X" # Other/unknown
class LexicalFeatures(str, Enum):
class LexicalFeatures(str, Enum):
"""Lexical features for words."""
FORMAL = "formal"
@@ -335,7 +335,7 @@ class LlmWordEnrichment(BaseModel):
class LlmEnrichmentResult(BaseModel):
enriched_words: list[LlmWordEnrichment] = Field(default_factory=list, description="List of enriched word data.")
enriched_words: list[LlmWordEnrichment] = Field(default_factory=list, description="List of enriched word data")
class LlmSegmentTranslation(BaseModel):
@@ -350,7 +350,12 @@ class LlmTranslationResult(BaseModel):
class VocabularyAnnotatingToolInput(BaseModel):
explanation: str = Field(
...,
description="This is a tool for adding a new vocabulary-annotating task to AnnotLexi.",
description="This is a tool for adding a new vocabulary-annotating task to AnnotLexi",
)
video_path: str = Field(..., description="Path to the video file")
skip_existing: bool = Field(default=True, description="Whether to skip existing subtitle files")
class QueryAnnotationTasksToolInput(BaseModel):
count: int = Field(default=5, description="The maximum number of returned annotation tasks")
explanation: str = Field(..., description="This is a tool for querying the latest annotation tasks in AnnotLexi")