fix(LexiAnnot): 避免潜在的数据校验错误

This commit is contained in:
wumode
2026-01-16 22:30:21 +08:00
parent 0ac725383e
commit 1f80e3b078
4 changed files with 15 additions and 16 deletions

View File

@@ -554,11 +554,12 @@
"name": "美剧生词标注",
"description": "根据CEFR等级为英语影视剧标注高级词汇。",
"labels": "英语",
"version": "1.2.3",
"version": "1.2.4",
"icon": "LexiAnnot.png",
"author": "wumode",
"level": 1,
"history": {
"v1.2.4": "增强数据校验",
"v1.2.3": "优化提示词",
"v1.2.1": "改进字幕样式获取方法",
"v1.2.0": "引入大模型候选词决策和词义丰富处理链; 支持读取系统智能体配置; 添加智能体工具; 优化通知样式; 改进 UI",

View File

@@ -60,7 +60,7 @@ class LexiAnnot(_PluginBase):
# 插件图标
plugin_icon = "LexiAnnot.png"
# 插件版本
plugin_version = "1.2.3"
plugin_version = "1.2.4"
# 插件作者
plugin_author = "wumode"
# 作者主页

View File

@@ -509,10 +509,6 @@ Your goal is two-fold:
* **Do NOT include** simple high-frequency words, common fillers ('gonna', 'gotta'), onomatopoeia, or basic swear words.
-------------------------
You MUST return output strictly matching the provided Pydantic schema.
Return ONLY valid JSON.
**Here are the output format instructions you MUST follow strictly:**
{format_instructions}
""",
),
@@ -556,10 +552,6 @@ For each word (identified by `WORD_ID`), provide:
**Your judgment should be based strictly on the provided subtitle context. DO NOT fabricate context or forced explanation.**
-------------------------
You MUST return output strictly matching the provided Pydantic schema.
Return ONLY valid JSON.
**Here are the output format instructions you MUST follow strictly:**
{format_instructions}
""",
),

View File

@@ -1,10 +1,10 @@
import re
import uuid
from collections import Counter
from enum import Enum
from enum import Enum, StrEnum
from typing import Literal, Generator, Iterator
from pydantic import BaseModel, Field, RootModel, model_validator
from pydantic import BaseModel, Field, RootModel, model_validator, field_validator
from app.utils.singleton import Singleton
@@ -12,9 +12,8 @@ from app.utils.singleton import Singleton
Cefr = Literal["C2", "C1", "B2", "B1", "A2", "A1"]
class UniversalPos(str, Enum):
class UniversalPos(StrEnum):
"""Universal Part-of-Speech tags"""
ADJ = "ADJ" # Adjective
ADV = "ADV" # Adverb
INTJ = "INTJ" # Interjection
@@ -34,9 +33,8 @@ class UniversalPos(str, Enum):
X = "X" # Other/unknown
class LexicalFeatures(str, Enum):
class LexicalFeatures(StrEnum):
"""Lexical features for words."""
FORMAL = "formal"
INFORMAL = "informal"
SLANG = "slang"
@@ -333,6 +331,14 @@ class LlmWordEnrichment(BaseModel):
usage_context: str | None = Field(default=None, description="Usage or Cultural Context")
lexical_features: list[LexicalFeatures] = Field(default_factory=list, description="Lexical features")
@field_validator("lexical_features", mode="before")
@classmethod
def filter_invalid_lexical_features(cls, v):
if isinstance(v, list):
valid_values = {f.value for f in LexicalFeatures}
return [item for item in v if item in valid_values]
return v
class LlmEnrichmentResult(BaseModel):
enriched_words: list[LlmWordEnrichment] = Field(default_factory=list, description="List of enriched word data")