diff --git a/package.v2.json b/package.v2.json index 999a7f0..245c3bd 100644 --- a/package.v2.json +++ b/package.v2.json @@ -554,11 +554,12 @@ "name": "美剧生词标注", "description": "根据CEFR等级,为英语影视剧标注高级词汇。", "labels": "英语", - "version": "1.2.3", + "version": "1.2.4", "icon": "LexiAnnot.png", "author": "wumode", "level": 1, "history": { + "v1.2.4": "增强数据校验", "v1.2.3": "优化提示词", "v1.2.1": "改进字幕样式获取方法", "v1.2.0": "引入大模型候选词决策和词义丰富处理链; 支持读取系统智能体配置; 添加智能体工具; 优化通知样式; 改进 UI", diff --git a/plugins.v2/lexiannot/__init__.py b/plugins.v2/lexiannot/__init__.py index 385c0f5..c9b7d5d 100644 --- a/plugins.v2/lexiannot/__init__.py +++ b/plugins.v2/lexiannot/__init__.py @@ -60,7 +60,7 @@ class LexiAnnot(_PluginBase): # 插件图标 plugin_icon = "LexiAnnot.png" # 插件版本 - plugin_version = "1.2.3" + plugin_version = "1.2.4" # 插件作者 plugin_author = "wumode" # 作者主页 diff --git a/plugins.v2/lexiannot/pipeline.py b/plugins.v2/lexiannot/pipeline.py index 7ca1c23..45dd7af 100644 --- a/plugins.v2/lexiannot/pipeline.py +++ b/plugins.v2/lexiannot/pipeline.py @@ -509,10 +509,6 @@ Your goal is two-fold: * **Do NOT include** simple high-frequency words, common fillers ('gonna', 'gotta'), onomatopoeia, or basic swear words. ------------------------- -You MUST return output strictly matching the provided Pydantic schema. -Return ONLY valid JSON. - -**Here are the output format instructions you MUST follow strictly:** {format_instructions} """, ), @@ -556,10 +552,6 @@ For each word (identified by `WORD_ID`), provide: **Your judgment should be based strictly on the provided subtitle context. DO NOT fabricate context or forced explanation.** ------------------------- -You MUST return output strictly matching the provided Pydantic schema. -Return ONLY valid JSON. - -**Here are the output format instructions you MUST follow strictly:** {format_instructions} """, ), diff --git a/plugins.v2/lexiannot/schemas.py b/plugins.v2/lexiannot/schemas.py index 764949a..801e9c0 100644 --- a/plugins.v2/lexiannot/schemas.py +++ b/plugins.v2/lexiannot/schemas.py @@ -1,10 +1,10 @@ import re import uuid from collections import Counter -from enum import Enum +from enum import Enum, StrEnum from typing import Literal, Generator, Iterator -from pydantic import BaseModel, Field, RootModel, model_validator +from pydantic import BaseModel, Field, RootModel, model_validator, field_validator from app.utils.singleton import Singleton @@ -12,9 +12,8 @@ from app.utils.singleton import Singleton Cefr = Literal["C2", "C1", "B2", "B1", "A2", "A1"] -class UniversalPos(str, Enum): +class UniversalPos(StrEnum): """Universal Part-of-Speech tags""" - ADJ = "ADJ" # Adjective ADV = "ADV" # Adverb INTJ = "INTJ" # Interjection @@ -34,9 +33,8 @@ class UniversalPos(str, Enum): X = "X" # Other/unknown -class LexicalFeatures(str, Enum): +class LexicalFeatures(StrEnum): """Lexical features for words.""" - FORMAL = "formal" INFORMAL = "informal" SLANG = "slang" @@ -333,6 +331,14 @@ class LlmWordEnrichment(BaseModel): usage_context: str | None = Field(default=None, description="Usage or Cultural Context") lexical_features: list[LexicalFeatures] = Field(default_factory=list, description="Lexical features") + @field_validator("lexical_features", mode="before") + @classmethod + def filter_invalid_lexical_features(cls, v): + if isinstance(v, list): + valid_values = {f.value for f in LexicalFeatures} + return [item for item in v if item in valid_values] + return v + class LlmEnrichmentResult(BaseModel): enriched_words: list[LlmWordEnrichment] = Field(default_factory=list, description="List of enriched word data")