feat(AutoSubv2): add auto language detection and improve translation retry logic

This commit is contained in:
JavaZeroo
2025-09-07 12:04:49 +08:00
parent af3956d86f
commit 47bf56afe4
2 changed files with 90 additions and 40 deletions

View File

@@ -101,6 +101,7 @@ class AutoSubv2(_PluginBase):
_max_retries = None
_enable_merge = None
_enable_asr = None
_auto_detect_language = None
_huggingface_proxy = None
_faster_whisper_model_path = None
_faster_whisper_model = None
@@ -126,6 +127,7 @@ class AutoSubv2(_PluginBase):
self._faster_whisper_model_path = config.get('faster_whisper_model_path',
self.get_data_path() / "faster-whisper-models")
self._huggingface_proxy = config.get('proxy', True)
self._auto_detect_language = config.get('auto_detect_language', False)
self._translate_zh = config.get('translate_zh', False)
if self._translate_zh:
use_chatgpt = config.get('use_chatgpt', True)
@@ -407,16 +409,28 @@ class AutoSubv2(_PluginBase):
model = WhisperModel(
download_model(self._faster_whisper_model, local_files_only=False, cache_dir=cache_dir),
device="cpu", compute_type="int8", cpu_threads=psutil.cpu_count(logical=False))
segments, info = model.transcribe(audio_file,
language=lang if lang != 'auto' else None,
word_timestamps=True,
vad_filter=True,
temperature=0,
beam_size=5)
logger.info("Detected language '%s' with probability %f" % (info.language, info.language_probability))
try:
segments, info = model.transcribe(audio_file,
language=lang if lang != 'auto' else None,
word_timestamps=True,
vad_filter=True,
temperature=0,
beam_size=5)
logger.info("Detected language '%s' with probability %f" % (info.language, info.language_probability))
if lang == 'auto':
lang = info.language
if lang == 'auto':
lang = info.language
except ValueError as e:
if "max() iterable argument is empty" in str(e):
logger.info("音频文件中未检测到任何语言内容,生成空字幕文件以避免重复处理")
# 生成空的字幕文件,避免重复识别
self.__save_srt(f"{audio_file}.srt", [])
# 如果原本是auto检测设置一个默认语言
lang = 'und' if lang == 'auto' else lang
return True, lang
else:
raise e
subs = []
if lang in ['en', 'eng']:
@@ -481,9 +495,15 @@ class AutoSubv2(_PluginBase):
if not ret:
logger.info(f"字幕源偏好:{self._translate_preference} 获取音轨元数据失败")
return False, None, None
if not iso639.find(audio_lang) or not iso639.to_iso639_1(audio_lang):
# 如果开启了自动语言检测直接设置为auto跳过metadata的语言信息
if self._auto_detect_language:
logger.info("已开启自动语言检测将使用whisper模型自动识别语言")
audio_lang = 'auto'
elif not iso639.find(audio_lang) or not iso639.to_iso639_1(audio_lang):
logger.info(f"字幕源偏好:{self._translate_preference} 未从音轨元数据中获取到语言信息")
audio_lang = 'auto'
# 当字幕源偏好为origin_first时优先使用音轨语言
if self._translate_preference == "origin_first":
prefer_subtitle_langs = ['en', 'eng'] if audio_lang == 'auto' else [audio_lang,
@@ -570,7 +590,7 @@ class AutoSubv2(_PluginBase):
os.remove(f"{audio_file.name}.srt")
return ret, lang, Path(f"{subtitle_file}.{lang}.srt")
else:
logger.error(f"生成字幕失败")
logger.error("生成字幕失败")
return False, None, None
@staticmethod
@@ -810,8 +830,8 @@ class AutoSubv2(_PluginBase):
def __translate_to_zh(self, text: str, context: str = None) -> str:
if self._event.is_set():
raise UserInterruptException(f"用户中断当前任务")
return self._openai.translate_to_zh(text, context)
raise UserInterruptException("用户中断当前任务")
return self._openai.translate_to_zh(text, context, max_retries=self._max_retries)
def __process_batch(self, all_subs: list, batch: list) -> list:
"""批量处理逻辑"""
@@ -839,20 +859,17 @@ class AutoSubv2(_PluginBase):
def __process_single(self, all_subs: List[srt.Subtitle], item: srt.Subtitle) -> srt.Subtitle:
"""单条处理逻辑"""
for _ in range(self._max_retries):
idx = all_subs.index(item)
context = self.__get_context(all_subs, [idx], is_batch=False) if self._context_window > 0 else None
success, trans = self.__translate_to_zh(item.content, context)
idx = all_subs.index(item)
context = self.__get_context(all_subs, [idx], is_batch=False) if self._context_window > 0 else None
success, trans = self.__translate_to_zh(item.content, context)
if success:
item.content = f"{trans}\n{item.content}"
self._stats['line_fallback'] += 1
return item
time.sleep(1)
item.content = f"[翻译失败]\n{item.content}"
return item
if success:
item.content = f"{trans}\n{item.content}"
self._stats['line_fallback'] += 1
return item
else:
item.content = f"[翻译失败]\n{item.content}"
return item
def __translate_zh_subtitle(self, source_lang: str, source_subtitle: str, dest_subtitle: str):
self._stats = {'total': 0, 'batch_success': 0, 'batch_fail': 0, 'line_fallback': 0}
@@ -1189,6 +1206,20 @@ class AutoSubv2(_PluginBase):
}
]
},
{
'component': 'VCol',
'props': {'cols': 12, 'md': 4, 'v-show': 'enable_asr'},
'content': [
{
'component': 'VSwitch',
'props': {
'model': 'auto_detect_language',
'label': '自动检测语言',
'hint': '使用whisper模型自动检测语言而非依赖视频元数据'
}
}
]
},
{
'component': 'VCol',
'props': {'cols': 12, 'md': 4, 'v-show': 'enable_asr'},
@@ -1206,10 +1237,15 @@ class AutoSubv2(_PluginBase):
}
}
]
},
}
]
},
{
'component': 'VRow',
'content': [
{
'component': 'VCol',
'props': {'cols': 12, 'md': 4, 'v-show': 'enable_asr'},
'props': {'cols': 12, 'md': 12, 'v-show': 'enable_asr'},
'content': [
{
'component': 'VSwitch',
@@ -1508,6 +1544,7 @@ class AutoSubv2(_PluginBase):
"translate_preference": "english_first",
"translate_zh": False,
"enable_asr": True,
"auto_detect_language": False,
"faster_whisper_model": "base",
"proxy": True,
"use_chatgpt": True,

View File

@@ -1,4 +1,5 @@
import time
import random
from typing import List, Union
import openai
@@ -108,11 +109,12 @@ class OpenAi:
if OpenAISessionCache.get(session_id):
OpenAISessionCache.delete(session_id)
def translate_to_zh(self, text: str, context: str = None):
def translate_to_zh(self, text: str, context: str = None, max_retries: int = 3):
"""
翻译为中文
:param text: 输入文本
:param context: 翻译上下文
:param max_retries: 最大重试次数
"""
system_prompt = """您是一位专业字幕翻译专家,请严格遵循以下规则:
1. 将原文精准翻译为简体中文,保持原文本意
@@ -121,14 +123,25 @@ class OpenAi:
4. 按行翻译待译内容。翻译结果不要包括上下文。
5. 输出内容必须仅包括译文。不要输出任何开场白,解释说明或总结"""
user_prompt = f"翻译上下文:\n{context}\n\n需要翻译的内容:\n{text}" if context else f"请翻译:\n{text}"
result = ""
try:
completion = self.__get_model(prompt=system_prompt,
message=user_prompt,
temperature=0.2,
top_p=0.9)
result = completion.choices[0].message.content.strip()
return True, result
except Exception as e:
print(f"{str(e)}{result}")
return False, f"{str(e)}{result}"
last_error = ""
for attempt in range(max_retries + 1):
try:
completion = self.__get_model(prompt=system_prompt,
message=user_prompt,
temperature=0.2,
top_p=0.9)
result = completion.choices[0].message.content.strip()
return True, result
except Exception as e:
last_error = str(e)
if attempt < max_retries:
# 使用指数退避和随机抖动,避免多个请求同时重试
base_delay = 2 ** attempt # 指数退避: 1s, 2s, 4s...
jitter = random.uniform(0.1, 0.9) # 随机抖动: 0.1-0.9秒
sleep_time = base_delay + jitter
print(f"翻译请求失败 (第{attempt + 1}次尝试){last_error}{sleep_time:.1f}秒后重试...")
time.sleep(sleep_time)
else:
print(f"翻译请求失败 (已重试{max_retries}次){last_error}")
return False, f"{last_error}"