mirror of
https://github.com/jxxghp/MoviePilot-Plugins.git
synced 2026-03-27 10:05:57 +00:00
feat(LexiAnnot): Improve subtitle selection strategy
This commit is contained in:
@@ -533,11 +533,12 @@
|
||||
"name": "美剧生词标注",
|
||||
"description": "根据CEFR等级,为英语影视剧标注高级词汇。",
|
||||
"labels": "英语",
|
||||
"version": "1.1.3",
|
||||
"version": "1.1.4",
|
||||
"icon": "LexiAnnot.png",
|
||||
"author": "wumode",
|
||||
"level": 1,
|
||||
"history": {
|
||||
"v1.1.4": "优化字幕选择决策",
|
||||
"v1.1.3": "适配 Pydantic V2 (主程序版本需高于 2.8.1-1)",
|
||||
"v1.1.2": "使用子进程避免 spaCy 模型常驻内存",
|
||||
"v1.1.1": "添加任务页面; 改进 spaCy 模型加载逻辑",
|
||||
|
||||
@@ -13,7 +13,7 @@ from app.core.config import settings
|
||||
from app.core.event import eventmanager, Event
|
||||
from app.log import logger
|
||||
from app.schemas.types import EventType, NotificationType
|
||||
from app.scheduler import Scheduler
|
||||
from app.core.config import global_vars
|
||||
|
||||
from .api import ClashRuleProviderApi, apis
|
||||
from .base import _ClashRuleProviderBase
|
||||
@@ -92,11 +92,7 @@ class ClashRuleProvider(_ClashRuleProviderBase):
|
||||
self.state.ruleset_rules_manager.clear()
|
||||
|
||||
if ClashRuleProvider.event_loop is None:
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
except RuntimeError:
|
||||
loop = Scheduler().loop
|
||||
ClashRuleProvider.event_loop = loop
|
||||
ClashRuleProvider.event_loop = global_vars.loop
|
||||
self.scheduler = AsyncIOScheduler(timezone=settings.TZ, event_loop=ClashRuleProvider.event_loop)
|
||||
self.services = ClashRuleProviderService(self.__class__.__name__, self.config, self.state, self.store,
|
||||
self.scheduler)
|
||||
|
||||
@@ -83,7 +83,7 @@ class LexiAnnot(_PluginBase):
|
||||
# 插件图标
|
||||
plugin_icon = "LexiAnnot.png"
|
||||
# 插件版本
|
||||
plugin_version = "1.1.3"
|
||||
plugin_version = "1.1.4"
|
||||
# 插件作者
|
||||
plugin_author = "wumode"
|
||||
# 作者主页
|
||||
@@ -109,7 +109,7 @@ class LexiAnnot(_PluginBase):
|
||||
_context_window: int = 0
|
||||
_max_retries: int = 0
|
||||
_request_interval: int = 0
|
||||
_ffmpeg_path = ''
|
||||
_ffmpeg_path: str = 'ffmpeg'
|
||||
_english_only = False
|
||||
_when_file_trans = False
|
||||
_model_temperature = ''
|
||||
@@ -154,7 +154,7 @@ class LexiAnnot(_PluginBase):
|
||||
self._context_window = int(config.get("context_window") or 10)
|
||||
self._max_retries = int(config.get("max_retries") or 3)
|
||||
self._request_interval = int(config.get("request_interval") or 3)
|
||||
self._ffmpeg_path = config.get("ffmpeg_path")
|
||||
self._ffmpeg_path = config.get("ffmpeg_path") or 'ffmpeg'
|
||||
self._english_only = config.get("english_only")
|
||||
self._when_file_trans = config.get("when_file_trans")
|
||||
self._model_temperature = config.get("model_temperature") or '0.3'
|
||||
@@ -975,31 +975,23 @@ class LexiAnnot(_PluginBase):
|
||||
},
|
||||
'content': [
|
||||
{
|
||||
'component': 'VRow',
|
||||
'component': 'VCol',
|
||||
'props': {
|
||||
'class': 'd-none d-sm-block',
|
||||
'cols': 12,
|
||||
},
|
||||
'content': [
|
||||
{
|
||||
'component': 'VCol',
|
||||
'component': 'VDataTableVirtual',
|
||||
'props': {
|
||||
'cols': 12,
|
||||
},
|
||||
'content': [
|
||||
{
|
||||
'component': 'VDataTableVirtual',
|
||||
'props': {
|
||||
'class': 'text-sm',
|
||||
'headers': headers,
|
||||
'items': items,
|
||||
'height': '30rem',
|
||||
'density': 'compact',
|
||||
'fixed-header': True,
|
||||
'hide-no-data': True,
|
||||
'hover': True
|
||||
}
|
||||
}
|
||||
]
|
||||
'class': 'text-sm',
|
||||
'headers': headers,
|
||||
'items': items,
|
||||
'height': '30rem',
|
||||
'density': 'compact',
|
||||
'fixed-header': True,
|
||||
'hide-no-data': True,
|
||||
'hover': True
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1213,7 +1205,11 @@ class LexiAnnot(_PluginBase):
|
||||
embedded_subtitles = LexiAnnot._extract_subtitles_by_lang(path, eng_mark, ffmpeg_path)
|
||||
if not embedded_subtitles:
|
||||
return TaskStatus.CANCELED
|
||||
embedded_subtitles = sorted(embedded_subtitles, key=lambda track: 'SDH' in track['title'])
|
||||
# order factor = 0, if 'SDH' in track['title']
|
||||
# order factor = track['duration'], otherwise
|
||||
embedded_subtitles = sorted(embedded_subtitles,
|
||||
key=lambda track: track['duration']*(1-int('SDH' in track['title'])),
|
||||
reverse=True)
|
||||
ret_message = ''
|
||||
if embedded_subtitles:
|
||||
logger.info(f'提取到 {len(embedded_subtitles)} 条英语文本字幕')
|
||||
@@ -1705,7 +1701,8 @@ class LexiAnnot(_PluginBase):
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _extract_subtitles_by_lang(video_path: str, lang: str | list = 'en', ffmpeg: str = 'ffmpeg') -> Optional[List[Dict]]:
|
||||
def _extract_subtitles_by_lang(video_path: str, lang: str | list = 'en', ffmpeg: str = 'ffmpeg'
|
||||
) -> Optional[List[Dict]]:
|
||||
"""
|
||||
提取视频文件中的内嵌英文字幕,使用 MediaInfo 查找字幕流。
|
||||
"""
|
||||
@@ -1720,12 +1717,22 @@ class LexiAnnot(_PluginBase):
|
||||
try:
|
||||
media_info: pymediainfo.MediaInfo = pymediainfo.MediaInfo.parse(video_path)
|
||||
for track in media_info.tracks:
|
||||
if track.track_type == 'Text' and check_lang(track_lang=track.language) and track.codec_id in supported_codec:
|
||||
if (track.track_type == 'Text' and check_lang(track_lang=track.language)
|
||||
and track.codec_id in supported_codec):
|
||||
subtitle_stream_index = track.stream_identifier # MediaInfo 的 stream_id 从 1 开始,ffmpeg 从 0 开始
|
||||
subtitle = LexiAnnot.__extract_subtitle(video_path, subtitle_stream_index, ffmpeg)
|
||||
if hasattr(track, 'duration'):
|
||||
if isinstance(track.duration, str) and StringUtils.is_number(track.duration):
|
||||
duration = int(float(track.duration))
|
||||
elif isinstance(track.duration, int):
|
||||
duration = track.duration
|
||||
else:
|
||||
duration = 0
|
||||
else:
|
||||
duration = 0
|
||||
if subtitle:
|
||||
subtitles.append({'title': track.title or '', 'subtitle': subtitle, 'codec_id': track.codec_id,
|
||||
'stream_id': subtitle_stream_index})
|
||||
'stream_id': subtitle_stream_index, 'duration': duration})
|
||||
if subtitles:
|
||||
return subtitles
|
||||
else:
|
||||
@@ -1761,7 +1768,7 @@ class LexiAnnot(_PluginBase):
|
||||
)
|
||||
|
||||
if not response.success:
|
||||
logger.warning(f"Error in subprocess response: {response.message}")
|
||||
logger.warning(f"Error in response: {response.message}")
|
||||
return tasks.tasks
|
||||
|
||||
self._total_token_count += response.total_token_count
|
||||
@@ -1918,7 +1925,7 @@ Only complete the `Chinese` field. Do not include pinyin, explanations, or any a
|
||||
)
|
||||
i = 0
|
||||
dialog_trans_instruction = '''You are an expert translator. You will be given a list of dialogue translation tasks in JSON format. For each entry, provide the most appropriate translation in Simplified Chinese based on the context.
|
||||
Only complete the `Chinese` field. Do not include pinyin, explanations, or any additional information.'''
|
||||
Only complete the `Chinese` field. Do not include pinyin, explanations, or any additional information.'''
|
||||
while i < len(translation_tasks):
|
||||
if self._shutdown_event.is_set():
|
||||
return lines_to_process
|
||||
@@ -2044,4 +2051,8 @@ Only complete the `Chinese` field. Do not include pinyin, explanations, or any a
|
||||
if chinese and chinese[-1] in ['。', ',']:
|
||||
chinese = chinese[:-1]
|
||||
main_dialogue[line_data['index']].text = main_dialogue[line_data['index']].text + f"\\N{chinese}"
|
||||
|
||||
# 避免 Infuse 显示乱码
|
||||
unexplainable_line = pysubs2.SSAEvent(start=0, end=0, text=f"{{\\rAnnotation ZH}}{self.plugin_name}{{\\r}}")
|
||||
ass_file.insert(0, unexplainable_line)
|
||||
return ass_file
|
||||
|
||||
@@ -68,13 +68,14 @@ def translate(
|
||||
returns: GeminiResponse containing the results
|
||||
"""
|
||||
|
||||
client = genai.Client(api_key=api_key)
|
||||
|
||||
messages = []
|
||||
|
||||
response_schema = type(translation_tasks)
|
||||
|
||||
for attempt in range(1, max_retries + 1):
|
||||
try:
|
||||
client = genai.Client(api_key=api_key)
|
||||
response = client.models.generate_content(
|
||||
model=gemini_model,
|
||||
contents=translation_tasks.model_dump_json(),
|
||||
@@ -100,7 +101,7 @@ def translate(
|
||||
except Exception as e:
|
||||
messages.append(f"Attempt {attempt} failed: {str(e)}")
|
||||
if attempt < max_retries:
|
||||
time.sleep(retry_delay)
|
||||
time.sleep(attempt*retry_delay)
|
||||
|
||||
return GeminiResponse(
|
||||
tasks=[],
|
||||
|
||||
@@ -1 +1 @@
|
||||
eyI1MnB0LnNpdGUiOiBbIjUycHQuc2l0ZSJdLCAiYXVkaWVuY2VzLm1lIjogWyJ0LmF1ZGllbmNlcy5tZSIsICJ0cmFja2VyLmNpbmVmaWxlcy5pbmZvIl0sICJidHNjaG9vbC5jbHViIjogWyJwdC5idHNjaG9vbC5jbHViIl0sICJieXIucHQiOiBbInRyYWNrZXIuYnlyLnB0Il0sICJjYXJwdC5uZXQiOiBbInRyYWNrZXIuY2FycHQubmV0Il0sICJjcmFicHQudmlwIjogWyJjcmFicHQudmlwIl0sICJjc3B0LnRvcCI6IFsidHJhY2tlci5jc3B0LnRvcCIsICJ0cmFja2VyLmNzcHQuY2MiLCAidHJhY2tlci5jc3B0LmRhdGUiXSwgImRpc2NmYW4ubmV0IjogWyJkaXNjZmFuLnh5eiJdLCAiZWFzdGdhbWUub3JnIjogWyJwdC5lYXN0Z2FtZS5vcmciXSwgImV0OC5vcmciOiBbImV0OC5vcmciLCAidC5ldDgub3JnIl0sICJnYW1lZ2FtZXB0LmNvbSI6IFsid3d3LmdhbWVnYW1lcHQuY29tIl0sICJoZGFyZWEuY2x1YiI6IFsidHJhY2tlci5oZGFyZWEuY2x1YiJdLCAiaGRkb2xieS5jb20iOiBbInQuaGRkb2xieS5jb20iXSwgImhkZmFucy5vcmciOiBbImhkZmFucy5vcmciXSwgImhka3lsLmluIjogWyJ0cmFja2VyLmhka3lsLmluIl0sICJoZHRpbWUub3JnIjogWyJoZHRpbWUub3JnIl0sICJoaXRwdC5jb20iOiBbImhpdHB0LmNvbSJdLCAiaHVkYnQuaHVzdC5lZHUuY24iOiBbImh1ZGJ0Lmh1c3QuZWR1LmNuIl0sICJpY2MyMDIyLmNvbSI6IFsidHJhY2tlci5pY2MyMDIyLnh5eiJdLCAiaWxvbGljb24uY29tIjogWyJ0cmFja2VyLmlsb2xpY29uLmNjIl0sICJrZWVwZnJkcy5jb20iOiBbInRyYWNrZXIua2VlcGZyZHMuY29tIl0sICJtLXRlYW0uY2MiOiBbInRyYWNrZXIubS10ZWFtLmNjIiwgInRyYWNrZXIubS10ZWFtLmlvIl0sICJtb25pa2FkZXNpZ24udWsiOiBbInRyYWNrZXIubW9uaWthZGVzaWduLnVrIiwgImRhaWtpcmFpLm1vbmlrYWRlc2lnbi51ayIsICJhbmltZS1uby1pbmRleC5jb20iXSwgIm5pY2VwdC5uZXQiOiBbInd3dy5uaWNlcHQubmV0Il0sICJva3B0Lm5ldCI6IFsid3d3Lm9rcHQubmV0Il0sICJwdGhvbWUubmV0IjogWyJwdGhvbWUubmV0Il0sICJwdGxncy5vcmciOiBbInB0bC5ncyIsICJyZWxheTAxLnB0bC5ncyJdLCAicHRzYmFvLmNsdWIiOiBbInB0c2Jhby5jbHViIl0sICJwdHRpbWUub3JnIjogWyJ3d3cucHR0aW1lLm9yZyJdLCAicHR6b25lLnh5eiI6IFsicHR6b25lLnh5eiJdLCAicWluZ3dhcHQuY29tIjogWyJ0cmFja2VyLnFpbmd3YS5wcm8iLCAidHJhY2tlci5xaW5nd2FwdC5jb20iXSwgInJhaW5nZmgudG9wIjogWyJyYWluZ2ZoLnRvcCJdLCAicm91c2kuemlwIjogWyJoaXRwdC5jb20iXSwgInNwcmluZ3N1bmRheS5uZXQiOiBbIm9uNi5zcHJpbmdzdW5kYXkubmV0IiwgIm9uLnNwcmluZ3N1bmRheS5uZXQiXSwgInRqdXB0Lm9yZyI6IFsidHJhY2tlci1wdWJsaWMudGp1cHQub3JnIl0sICJ0b3RoZWdsb3J5LmltIjogWyJ0cmFja2VyLnRvdGhlZ2xvcnkuaW0iXSwgInUyLmRtaHkub3JnIjogWyJkYXlkcmVhbS5kbWh5LmJlc3QiXSwgInhpbmd5dW5nZS50b3AiOiBbInRyYWNrZXIueGluZ3l1bmdlLnRvcCIsICJ0cmFja2VyLnhpbmd5dW5nZS5zYnMiXSwgInptcHQuY2MiOiBbInptcHQuY2MiXSwgImhoYW5jbHViLnRvcCI6IFsidHJhY2tlci5oaGFuY2x1Yi50b3AiXSwgImhkY2l0eS5jaXR5IjogWyJzeW5jLmxlbml0ZXIub3JnIl19
|
||||
eyI1MnB0LnNpdGUiOiBbIjUycHQuc2l0ZSJdLCAiYXVkaWVuY2VzLm1lIjogWyJ0LmF1ZGllbmNlcy5tZSIsICJ0cmFja2VyLmNpbmVmaWxlcy5pbmZvIl0sICJidHNjaG9vbC5jbHViIjogWyJwdC5idHNjaG9vbC5jbHViIl0sICJieXIucHQiOiBbInRyYWNrZXIuYnlyLnB0Il0sICJjYXJwdC5uZXQiOiBbInRyYWNrZXIuY2FycHQubmV0Il0sICJjcmFicHQudmlwIjogWyJjcmFicHQudmlwIl0sICJjc3B0LnRvcCI6IFsidHJhY2tlci5jc3B0LnRvcCIsICJ0cmFja2VyLmNzcHQuY2MiLCAidHJhY2tlci5jc3B0LmRhdGUiXSwgImRpc2NmYW4ubmV0IjogWyJkaXNjZmFuLnh5eiJdLCAiZWFzdGdhbWUub3JnIjogWyJwdC5lYXN0Z2FtZS5vcmciXSwgImV0OC5vcmciOiBbImV0OC5vcmciLCAidC5ldDgub3JnIl0sICJnYW1lZ2FtZXB0LmNvbSI6IFsid3d3LmdhbWVnYW1lcHQuY29tIl0sICJoZGFyZWEuY2x1YiI6IFsidHJhY2tlci5oZGFyZWEuY2x1YiJdLCAiaGRkb2xieS5jb20iOiBbInQuaGRkb2xieS5jb20iXSwgImhkZmFucy5vcmciOiBbImhkZmFucy5vcmciXSwgImhka3lsLmluIjogWyJ0cmFja2VyLmhka3lsLmluIl0sICJoZHRpbWUub3JnIjogWyJoZHRpbWUub3JnIl0sICJoaXRwdC5jb20iOiBbImhpdHB0LmNvbSJdLCAiaHVkYnQuaHVzdC5lZHUuY24iOiBbImh1ZGJ0Lmh1c3QuZWR1LmNuIl0sICJpY2MyMDIyLmNvbSI6IFsidHJhY2tlci5pY2MyMDIyLnh5eiJdLCAiaWxvbGljb24uY29tIjogWyJ0cmFja2VyLmlsb2xpY29uLmNjIl0sICJrZWVwZnJkcy5jb20iOiBbInRyYWNrZXIua2VlcGZyZHMuY29tIl0sICJtLXRlYW0uY2MiOiBbInRyYWNrZXIubS10ZWFtLmNjIiwgInRyYWNrZXIubS10ZWFtLmlvIl0sICJtb25pa2FkZXNpZ24udWsiOiBbInRyYWNrZXIubW9uaWthZGVzaWduLnVrIiwgImRhaWtpcmFpLm1vbmlrYWRlc2lnbi51ayIsICJhbmltZS1uby1pbmRleC5jb20iXSwgIm5pY2VwdC5uZXQiOiBbInd3dy5uaWNlcHQubmV0Il0sICJva3B0Lm5ldCI6IFsid3d3Lm9rcHQubmV0Il0sICJwdGhvbWUubmV0IjogWyJwdGhvbWUubmV0Il0sICJwdGxncy5vcmciOiBbInB0bC5ncyIsICJyZWxheTAxLnB0bC5ncyJdLCAicHRzYmFvLmNsdWIiOiBbInB0c2Jhby5jbHViIl0sICJwdHRpbWUub3JnIjogWyJ3d3cucHR0aW1lLm9yZyJdLCAicHR6b25lLnh5eiI6IFsicHR6b25lLnh5eiJdLCAicWluZ3dhcHQuY29tIjogWyJ0cmFja2VyLnFpbmd3YS5wcm8iLCAidHJhY2tlci5xaW5nd2FwdC5jb20iLCAidHJhY2tlci5xaW5nd2FwdC5vcmciXSwgInJhaW5nZmgudG9wIjogWyJyYWluZ2ZoLnRvcCJdLCAicm91c2kuemlwIjogWyJoaXRwdC5jb20iXSwgInNwcmluZ3N1bmRheS5uZXQiOiBbIm9uNi5zcHJpbmdzdW5kYXkubmV0IiwgIm9uLnNwcmluZ3N1bmRheS5uZXQiXSwgInRqdXB0Lm9yZyI6IFsidHJhY2tlci1wdWJsaWMudGp1cHQub3JnIl0sICJ0b3RoZWdsb3J5LmltIjogWyJ0cmFja2VyLnRvdGhlZ2xvcnkuaW0iXSwgInUyLmRtaHkub3JnIjogWyJkYXlkcmVhbS5kbWh5LmJlc3QiXSwgInhpbmd5dW5nZS50b3AiOiBbInRyYWNrZXIueGluZ3l1bmdlLnRvcCIsICJ0cmFja2VyLnhpbmd5dW5nZS5zYnMiXSwgInptcHQuY2MiOiBbInptcHQuY2MiXSwgImhoYW5jbHViLnRvcCI6IFsidHJhY2tlci5oaGFuY2x1Yi50b3AiXSwgImhkY2l0eS5jaXR5IjogWyJzeW5jLmxlbml0ZXIub3JnIl19
|
||||
Reference in New Issue
Block a user