diff --git a/package.v2.json b/package.v2.json
index 3dca7b3..7262e07 100644
--- a/package.v2.json
+++ b/package.v2.json
@@ -470,11 +470,12 @@
"name": "IMDb源",
"description": "让探索,推荐和媒体识别支持IMDb数据源。",
"labels": "探索",
- "version": "1.6.5",
+ "version": "1.6.6",
"icon": "IMDb_IOS-OSX_App.png",
"author": "wumode",
"level": 1,
"history": {
+ "v1.6.6": "优化主页组件链接跳转",
"v1.6.5": "仪表盘组件支持图片缓存",
"v1.6.4": "为元数据增加背景图",
"v1.6.3": "优化媒体识别速度; 适配 Pydantic V2 (主程序版本需高于 2.8.1-1)",
@@ -549,11 +550,12 @@
"name": "美剧生词标注",
"description": "根据CEFR等级,为英语影视剧标注高级词汇。",
"labels": "英语",
- "version": "1.2.1",
+ "version": "1.2.2",
"icon": "LexiAnnot.png",
"author": "wumode",
"level": 1,
"history": {
+ "v1.2.2": "优化提示词",
"v1.2.1": "改进字幕样式获取方法",
"v1.2.0": "引入大模型候选词决策和词义丰富处理链; 支持读取系统智能体配置; 添加智能体工具; 优化通知样式; 改进 UI",
"v1.1.4": "优化字幕选择决策",
diff --git a/plugins.v2/imdbsource/__init__.py b/plugins.v2/imdbsource/__init__.py
index cf444d4..7a89fe3 100644
--- a/plugins.v2/imdbsource/__init__.py
+++ b/plugins.v2/imdbsource/__init__.py
@@ -34,7 +34,7 @@ class ImdbSource(_PluginBase):
# 插件图标
plugin_icon = "IMDb_IOS-OSX_App.png"
# 插件版本
- plugin_version = "1.6.5"
+ plugin_version = "1.6.6"
# 插件作者
plugin_author = "wumode"
# 作者主页
@@ -285,28 +285,19 @@ class ImdbSource(_PluginBase):
},
'content': [
{
- 'component': 'RouterLink',
+ 'component': 'h1',
'props': {
- 'to': mp_url,
- 'class': 'no-underline'
+ 'class': 'mb-1 text-white text-shadow font-extrabold text-2xl line-clamp-2 overflow-hidden text-ellipsis ...'
},
- 'content': [
- {
- 'component': 'h1',
- 'props': {
- 'class': 'mb-1 text-white text-shadow font-extrabold text-2xl line-clamp-2 overflow-hidden text-ellipsis ...'
- },
- 'html': f"{entry.name} {year}",
- },
- {
- 'component': 'span',
- 'props': {
- 'class': 'text-shadow line-clamp-2 overflow-hidden text-ellipsis ...'
- },
- 'html': imdb_title.plot_text,
- }
- ]
+ 'html': f"{entry.name} {year}",
},
+ {
+ 'component': 'span',
+ 'props': {
+ 'class': 'text-shadow line-clamp-2 overflow-hidden text-ellipsis ...'
+ },
+ 'html': imdb_title.plot_text,
+ }
]
}
]
@@ -392,7 +383,8 @@ class ImdbSource(_PluginBase):
{
'component': 'a',
'props': {
- 'href': f'#{mp_url}',
+ 'href': f"https://www.imdb.com/title/{entry.ttconst}",
+ 'target': '_blank',
'class': 'no-underline w-100',
'style': 'display: flex; justify-content: center;'
},
@@ -454,15 +446,17 @@ class ImdbSource(_PluginBase):
{
'component': 'a',
'props': {
- 'href': f"https://www.imdb.com/title/{entry.ttconst}",
- 'target': '_blank',
+ 'href': f'#{mp_url}',
'rel': 'noopener noreferrer',
'class': 'text-h4 font-weight-bold mb-2 d-flex text-white align-center',
},
'content': [
{
'component': 'span',
- 'html': f"{entry.name}"
+ 'html': f"{entry.name}",
+ 'props': {
+ 'class': 'line-clamp-2 overflow-hidden',
+ }
},
{
'component': 'v-icon',
diff --git a/plugins.v2/lexiannot/__init__.py b/plugins.v2/lexiannot/__init__.py
index 98791a2..512e130 100644
--- a/plugins.v2/lexiannot/__init__.py
+++ b/plugins.v2/lexiannot/__init__.py
@@ -3,7 +3,6 @@ import os
import json
import queue
import re
-import shutil
import subprocess
import sys
import threading
@@ -61,7 +60,7 @@ class LexiAnnot(_PluginBase):
# 插件图标
plugin_icon = "LexiAnnot.png"
# 插件版本
- plugin_version = "1.2.1"
+ plugin_version = "1.2.2"
# 插件作者
plugin_author = "wumode"
# 作者主页
@@ -163,10 +162,6 @@ class LexiAnnot(_PluginBase):
self._color_alpha = int(self._opacity) if self._opacity and len(self._opacity) else 0
if self._delete_data:
# 删除不再保存在数据库的数据
- self.del_data("cefr_lexicon")
- self.del_data("coca2k_lexicon")
- self.del_data("swear_words")
- self.del_data("lexicon_version")
self.delete_data()
self._delete_data = False
self._loaded = False
@@ -1064,15 +1059,6 @@ class LexiAnnot(_PluginBase):
logger.error(f"词典 {lexicon_path} 删除失败: {e}")
self._load_lexicon_from_local.cache_clear()
- # 删除虚拟环境
- venv_dir = data_path / "venv_genai"
- if os.path.exists(venv_dir):
- try:
- shutil.rmtree(venv_dir)
- logger.info(f"虚拟环境 {venv_dir} 已删除")
- except Exception as e:
- logger.error(f"虚拟环境 {venv_dir} 删除失败: {e}")
-
# 删除任务记录
with self._tasks_lock:
self._tasks = {}
@@ -1324,9 +1310,7 @@ class LexiAnnot(_PluginBase):
ffmpeg_path = self._ffmpeg_path if self._ffmpeg_path else "ffmpeg"
eng_mark = ["en", "en-US", "eng", "en-GB", "english", "en-AU"]
- embedded_subtitles = LexiAnnot._extract_subtitles_by_lang(
- path, eng_mark, ffmpeg_path
- )
+ embedded_subtitles = LexiAnnot._extract_subtitles_by_lang(path, eng_mark, ffmpeg_path)
if not embedded_subtitles:
return ProcessResult(
status=TaskStatus.CANCELED, message="未找到嵌入式英文文本字幕"
@@ -1345,22 +1329,14 @@ class LexiAnnot(_PluginBase):
logger.info(f"提取到 {len(embedded_subtitles)} 条英语文本字幕")
for embedded_subtitle in embedded_subtitles:
if self._shutdown_event.is_set():
- return ProcessResult(
- status=TaskStatus.CANCELED, message="任务已取消"
- )
- ass_subtitle = SSAFile.from_string(
- embedded_subtitle["subtitle"], format_="ass"
- )
+ return ProcessResult(status=TaskStatus.CANCELED, message="任务已取消")
+ ass_subtitle = SSAFile.from_string(embedded_subtitle["subtitle"], format_="ass")
if embedded_subtitle.get("codec_id") == "S_TEXT/UTF8":
ass_subtitle = LexiAnnot.set_srt_style(ass_subtitle)
ass_subtitle = self.__set_style(ass_subtitle)
- ass_subtitle, stat = self.process_subtitles(
- ass_subtitle, lexi, spacy_worker, mediainfo
- )
+ ass_subtitle, stat = self.process_subtitles(ass_subtitle, lexi, spacy_worker, mediainfo)
if self._shutdown_event.is_set():
- return ProcessResult(
- status=TaskStatus.CANCELED, message="任务已取消"
- )
+ return ProcessResult(status=TaskStatus.CANCELED, message="任务已取消")
if ass_subtitle:
try:
ass_subtitle.save(str(ass_file))
@@ -1810,7 +1786,7 @@ class LexiAnnot(_PluginBase):
@staticmethod
def _extract_subtitles_by_lang(
video_path: str, lang: str | list = "en", ffmpeg: str = "ffmpeg"
- ) -> Optional[List[Dict]]:
+ ) -> list[dict]:
"""
提取视频文件中的内嵌英文字幕,使用 MediaInfo 查找字幕流。
"""
@@ -1853,21 +1829,25 @@ class LexiAnnot(_PluginBase):
}
)
if subtitles:
- return subtitles
- else:
+ # remove outliers with abnormally short duration
+ if len(subtitles) > 1:
+ durations = [sub["duration"] for sub in subtitles if sub["duration"] > 0]
+ if durations:
+ avg_duration = sum(durations) / len(durations)
+ subtitles = [
+ sub for sub in subtitles if sub["duration"] >= avg_duration * 0.2
+ ]
+ if not subtitles:
logger.warn("未找到标记为英语的文本字幕流")
- return None
except FileNotFoundError:
logger.error(f"找不到视频文件 '{video_path}'")
- return None
except subprocess.CalledProcessError as e:
logger.error(f"错误:提取字幕失败。\n错误信息:{e}")
logger.error(f"FFmpeg 输出 (stderr):\n{e.stderr}")
- return None
except Exception as e:
logger.error(f"使用 MediaInfo 提取字幕时发生错误:{e}")
- return None
+ return subtitles
def _process_chain(
self,
@@ -1884,12 +1864,9 @@ class LexiAnnot(_PluginBase):
:param spacy_worker: spaCy 分词器
:returns: 处理后的字幕行列表
"""
- simple_vocabulary = set(
- filter(
- lambda x: x < self._annot_level, ["A1", "A2", "B1", "B2", "C1", "C2"]
- )
- )
-
+ CEFR_LEVELS = ["A1", "A2", "B1", "B2", "C1", "C2"]
+ simple_vocabulary = set(filter(lambda x: x < self._annot_level, CEFR_LEVELS))
+ learner_level = max(simple_vocabulary)
model_temperature = float(self._model_temperature) if self._model_temperature else 0.3
logger.info("通过 spaCy 分词...")
for seg in segments:
@@ -1927,7 +1904,7 @@ class LexiAnnot(_PluginBase):
segments=segments,
shutdown_event=self._shutdown_event,
context_window=self._context_window,
- leaner_level=self._annot_level,
+ leaner_level=learner_level,
media_context=mediainfo,
translate_sentences=self._sentence_translation
)
diff --git a/plugins.v2/lexiannot/pipeline.py b/plugins.v2/lexiannot/pipeline.py
index c328886..7ca1c23 100644
--- a/plugins.v2/lexiannot/pipeline.py
+++ b/plugins.v2/lexiannot/pipeline.py
@@ -406,6 +406,7 @@ def _context_process_chain(
[
f"- {word.text} (WORD_ID: {word.meta.word_id}, LEMMA: {word.lemma}, CEFR: {word.cefr}, POS: {word.pos})"
for seg in segment_list
+ if start <= seg.index <= end
for word in seg.candidate_words
]
),
@@ -468,7 +469,7 @@ def _context_process_chain(
)
built_word = _update_word_via_lexicon(built_word, lexi)
- if built_word.cefr and built_word.cefr < leaner_level:
+ if built_word.cefr and built_word.cefr <= leaner_level:
continue
seg.candidate_words.append(built_word)
@@ -477,26 +478,31 @@ def _context_process_chain(
(
"system",
"""You are an expert in linguistics and language learning. Your task is to analyze subtitle segments.
-Please perform the following tasks for an English learner at {leaner_level} CEFR level.
+Please perform the following tasks for an non-native English learner.
-**CRITICAL INSTRUCTION**: The learner is advanced. They already know common daily vocabulary.
-Your goal is to identify **only** content that helps them reach native-level proficiency.
+**CRITICAL INSTRUCTION**: The learner is at the {leaner_level} level.
+They are proficient in vocabulary at or below this level.
+Your goal is two-fold:
+1. **Learning**: Identify content challenging for their current level.
+2. **Comprehension**: Ensure they understand **specific or low-frequency vocabulary** crucial for the narrative, even if it is not "core" vocabulary.
1. **Review and Evaluate Candidate Words:**
- * **Goal**: Filter out simple words and correct any errors in lemma/POS/text.
+ * **Goal**: Filter out words that are easy, BUT **retain** rare or specific words needed for understanding.
* **Action**: Return feedback items **ONLY** for words that:
1. Should be **discarded** (too simple, trivial filler, profanity without cultural value). Set `should_keep` to `False`.
2. Need **correction** (wrong lemma, POS, or text boundary). Set `should_keep` to `True` and provide correct values.
* **Implicit Rule**: If a word is appropriate for the learner and has correct info, **DO NOT** include it in the output list.
- * **Keep criteria**: Keep simple words **ONLY IF** used in a non-literal, metaphorical, or idiomatic sense.
- * **Discard criteria**: Discard trivial conversational fillers ('gonna', 'wanna'), simple interjections, common profanity, and words below {leaner_level} level.
+ * **Keep criteria**:
+ * Keep simple words **ONLY IF** used in a non-literal, metaphorical, or idiomatic sense.
+ * **Specific/Concrete Vocabulary**: Keep low-frequency words (e.g., like 'chamomile', 'cavernous' for B2) that are rare but essential for visualizing the scene or understanding the plot. **Do NOT discard these just because they are rare.**
+ * **Discard criteria**: Discard trivial conversational fillers ('gonna', 'wanna'), simple interjections, common profanity, and words well below {leaner_level} level (unless they fit the 'Keep criteria').
2. **Identify Missed Words:**
- * Identify any additional single words or phrases (typically 1-3 words) from the `context_text` that may be important for {leaner_level} learners. This specifically includes:
- * **Slang or informal expressions.**
- * **Internet terms or modern colloquialisms.**
- * **Words or phrases that require specific cultural background knowledge to understand.**
- * **Any other words or phrases that are challenging.**
+ * Identify any additional single words or phrases (typically 1-3 words) from the `context_text` that may be important for {leaner_level} learners or for **plot comprehension**.
+ * **Targets**:
+ * **Slang, idioms, or modern colloquialisms.**
+ * **Low-frequency words** (e.g., 'shimmer', 'rugged') missed by the algorithm.
+ * **Words requiring cultural background.**
* Avoid repeating words already listed in `candidate_words`.
* Must exist in the exact form in `context_text`.
* Provide lemma and POS.
@@ -690,7 +696,7 @@ def llm_process_chain(
segments: SegmentList,
shutdown_event: threading.Event,
context_window: int = 30,
- leaner_level: str = "C1",
+ learner_level: str = "C1",
media_context: Context | None = None,
translate_sentences: bool = False,
) -> SegmentList:
@@ -702,7 +708,7 @@ def llm_process_chain(
:param segments: 字幕片段
:param shutdown_event: 关闭事件
:param context_window: 上下文窗口大小
- :param leaner_level: 学习者的 CEFR 水平
+ :param learner_level: 学习者的 CEFR 水平
:param media_context: 媒体信息
:param translate_sentences: 是否翻译句子
:returns: 更新后的字幕片段列表
@@ -726,7 +732,7 @@ def llm_process_chain(
)
segments_list.extend(
_context_process_chain(
- lexi, llm, context, start, end, leaner_level, media_name, translate_sentences
+ lexi, llm, context, start, end, learner_level, media_name, translate_sentences
)
)