diff --git a/package.v2.json b/package.v2.json index 0908faa..e8751dc 100644 --- a/package.v2.json +++ b/package.v2.json @@ -451,11 +451,12 @@ "name": "IMDb源", "description": "让探索,推荐和媒体识别支持IMDb数据源。", "labels": "探索", - "version": "1.5.7", + "version": "1.5.8", "icon": "IMDb_IOS-OSX_App.png", "author": "wumode", "level": 1, "history": { + "v1.5.8": "修改UA", "v1.5.7": "改进异常处理", "v1.5.6": "固定仪表盘组件海报比例; 修复 bug", "v1.5.5": "修复初始化错误", @@ -510,11 +511,12 @@ "name": "美剧生词标注", "description": "根据CEFR等级,为英语影视剧标注高级词汇。", "labels": "英语", - "version": "1.1.0", + "version": "1.1.1", "icon": "LexiAnnot.png", "author": "wumode", "level": 1, "history": { + "v1.1.1": "添加任务页面; 改进 spaCy 模型加载逻辑", "v1.1.0": "支持考试词汇标注; 优化分词处理; 修复错误", "v1.0.1": "合并连字符词; 避免ARM平台依赖问题", "v1.0": "新增LexiAnnot" diff --git a/plugins.v2/imdbsource/__init__.py b/plugins.v2/imdbsource/__init__.py index 0109683..8a098e0 100644 --- a/plugins.v2/imdbsource/__init__.py +++ b/plugins.v2/imdbsource/__init__.py @@ -29,7 +29,7 @@ class ImdbSource(_PluginBase): # 插件图标 plugin_icon = "IMDb_IOS-OSX_App.png" # 插件版本 - plugin_version = "1.5.7" + plugin_version = "1.5.8" # 插件作者 plugin_author = "wumode" # 作者主页 diff --git a/plugins.v2/imdbsource/imdbhelper.py b/plugins.v2/imdbsource/imdbhelper.py index 13bab0d..0bb0f89 100644 --- a/plugins.v2/imdbsource/imdbhelper.py +++ b/plugins.v2/imdbsource/imdbhelper.py @@ -1,11 +1,11 @@ import asyncio -import re -from json import JSONDecodeError -from typing import Optional, Any, Dict, Tuple, List, Union +import base64 from collections import OrderedDict from dataclasses import dataclass +from json import JSONDecodeError import json -import base64 +import re +from typing import Any, AsyncGenerator, Dict, Generator, List, Optional, Tuple, Union import httpx import requests @@ -21,7 +21,6 @@ from app.schemas.types import MediaType @dataclass(frozen=True) class SearchParams: - title_types: Optional[Tuple[str, ...]] = None genres: Optional[Tuple[str, ...]] = None sort_by: str = 'POPULARITY' @@ -45,7 +44,6 @@ class SearchState: class ImdbHelper: - all_title_types = ["tvSeries", "tvMiniSeries", "movie", "tvMovie", "musicVideo", "tvShort", "short", "tvEpisode", "tvSpecial"] interests = {'Action': {'Action': 'in0000001', 'Action Epic': 'in0000002', 'B-Action': 'in0000003', 'Car Action': 'in0000004', 'Disaster': 'in0000005', 'Gun Fu': 'in0000197', 'Kung Fu': 'in0000198', 'Martial Arts': 'in0000006', 'One-Person Army Action': 'in0000007', 'Samurai': 'in0000199', 'Superhero': 'in0000008', 'Sword & Sandal': 'in0000009', 'War': 'in0000010', 'War Epic': 'in0000011', 'Wuxia': 'in0000200'}, 'Adventure': {'Adventure': 'in0000012', 'Adventure Epic': 'in0000015', 'Desert Adventure': 'in0000013', 'Dinosaur Adventure': 'in0000014', 'Globetrotting Adventure': 'in0000016', 'Jungle Adventure': 'in0000017', 'Mountain Adventure': 'in0000018', 'Quest': 'in0000019', 'Road Trip': 'in0000020', 'Sea Adventure': 'in0000021', 'Swashbuckler': 'in0000022', 'Teen Adventure': 'in0000023', 'Urban Adventure': 'in0000024'}, 'Animation': {'Adult Animation': 'in0000025', 'Animation': 'in0000026', 'Computer Animation': 'in0000028', 'Hand-Drawn Animation': 'in0000029', 'Stop Motion Animation': 'in0000030'}, 'Anime': {'Anime': 'in0000027', 'Isekai': 'in0000201', 'Iyashikei': 'in0000202', 'Josei': 'in0000203', 'Mecha': 'in0000204', 'Seinen': 'in0000205', 'Shōjo': 'in0000207', 'Shōnen': 'in0000206', 'Slice of Life': 'in0000208'}, 'Comedy': {'Body Swap Comedy': 'in0000031', 'Buddy Comedy': 'in0000032', 'Buddy Cop': 'in0000033', 'Comedy': 'in0000034', 'Dark Comedy': 'in0000035', 'Farce': 'in0000036', 'High-Concept Comedy': 'in0000037', 'Mockumentary': 'in0000038', 'Parody': 'in0000039', 'Quirky Comedy': 'in0000040', 'Raunchy Comedy': 'in0000041', 'Satire': 'in0000042', 'Screwball Comedy': 'in0000043', 'Sitcom': 'in0000044', 'Sketch Comedy': 'in0000045', 'Slapstick': 'in0000046', 'Stand-Up': 'in0000047', 'Stoner Comedy': 'in0000048', 'Teen Comedy': 'in0000049'}, 'Crime': {'Caper': 'in0000050', 'Cop Drama': 'in0000051', 'Crime': 'in0000052', 'Drug Crime': 'in0000053', 'Film Noir': 'in0000054', 'Gangster': 'in0000055', 'Heist': 'in0000056', 'Police Procedural': 'in0000057', 'True Crime': 'in0000058'}, 'Documentary': {'Crime Documentary': 'in0000059', 'Documentary': 'in0000060', 'Docuseries': 'in0000061', 'Faith & Spirituality Documentary': 'in0000062', 'Food Documentary': 'in0000063', 'History Documentary': 'in0000064', 'Military Documentary': 'in0000065', 'Music Documentary': 'in0000066', 'Nature Documentary': 'in0000067', 'Political Documentary': 'in0000068', 'Science & Technology Documentary': 'in0000069', 'Sports Documentary': 'in0000070', 'Travel Documentary': 'in0000071'}, 'Drama': {'Biography': 'in0000072', 'Coming-of-Age': 'in0000073', 'Costume Drama': 'in0000074', 'Docudrama': 'in0000075', 'Drama': 'in0000076', 'Epic': 'in0000077', 'Financial Drama': 'in0000078', 'Historical Epic': 'in0000079', 'History': 'in0000080', 'Korean Drama': 'in0000209', 'Legal Drama': 'in0000081', 'Medical Drama': 'in0000082', 'Period Drama': 'in0000083', 'Political Drama': 'in0000084', 'Prison Drama': 'in0000085', 'Psychological Drama': 'in0000086', 'Showbiz Drama': 'in0000087', 'Soap Opera': 'in0000088', 'Teen Drama': 'in0000089', 'Telenovela': 'in0000210', 'Tragedy': 'in0000090', 'Workplace Drama': 'in0000091'}, 'Family': {'Animal Adventure': 'in0000092', 'Family': 'in0000093'}, 'Fantasy': {'Dark Fantasy': 'in0000095', 'Fairy Tale': 'in0000097', 'Fantasy': 'in0000098', 'Fantasy Epic': 'in0000096', 'Supernatural Fantasy': 'in0000099', 'Sword & Sorcery': 'in0000100', 'Teen Fantasy': 'in0000101'}, 'Game Show': {'Beauty Competition': 'in0000102', 'Cooking Competition': 'in0000103', 'Game Show': 'in0000105', 'Quiz Show': 'in0000104', 'Survival Competition': 'in0000106', 'Talent Competition': 'in0000107'}, 'Horror': {'B-Horror': 'in0000108', 'Body Horror': 'in0000109', 'Folk Horror': 'in0000110', 'Found Footage Horror': 'in0000111', 'Horror': 'in0000112', 'Monster Horror': 'in0000113', 'Psychological Horror': 'in0000114', 'Slasher Horror': 'in0000115', 'Splatter Horror': 'in0000116', 'Supernatural Horror': 'in0000117', 'Teen Horror': 'in0000118', 'Vampire Horror': 'in0000119', 'Werewolf Horror': 'in0000120', 'Witch Horror': 'in0000121', 'Zombie Horror': 'in0000122'}, 'Lifestyle': {'Beauty Makeover': 'in0000123', 'Cooking & Food': 'in0000124', 'Home Improvement': 'in0000125', 'Lifestyle': 'in0000126', 'News': 'in0000211', 'Talk Show': 'in0000127', 'Travel': 'in0000128'}, 'Music': {'Concert': 'in0000129', 'Music': 'in0000130'}, 'Musical': {'Classic Musical': 'in0000131', 'Jukebox Musical': 'in0000132', 'Musical': 'in0000133', 'Pop Musical': 'in0000134', 'Rock Musical': 'in0000135'}, 'Mystery': {'Bumbling Detective': 'in0000136', 'Cozy Mystery': 'in0000137', 'Hard-boiled Detective': 'in0000138', 'Mystery': 'in0000139', 'Suspense Mystery': 'in0000140', 'Whodunnit': 'in0000141'}, 'Reality TV': {'Business Reality TV': 'in0000142', 'Crime Reality TV': 'in0000143', 'Dating Reality TV': 'in0000144', 'Docusoap Reality TV': 'in0000145', 'Hidden Camera': 'in0000146', 'Paranormal Reality TV': 'in0000147', 'Reality TV': 'in0000148'}, 'Romance': {'Dark Romance': 'in0000149', 'Feel-Good Romance': 'in0000151', 'Romance': 'in0000152', 'Romantic Comedy': 'in0000153', 'Romantic Epic': 'in0000150', 'Steamy Romance': 'in0000154', 'Teen Romance': 'in0000155', 'Tragic Romance': 'in0000156'}, 'Sci-Fi': {'Alien Invasion': 'in0000157', 'Artificial Intelligence': 'in0000158', 'Cyberpunk': 'in0000159', 'Dystopian Sci-Fi': 'in0000160', 'Kaiju': 'in0000161', 'Sci-Fi': 'in0000162', 'Sci-Fi Epic': 'in0000163', 'Space Sci-Fi': 'in0000164', 'Steampunk': 'in0000165', 'Time Travel': 'in0000166'}, 'Seasonal': {'Holiday': 'in0000192', 'Holiday Animation': 'in0000193', 'Holiday Comedy': 'in0000194', 'Holiday Family': 'in0000195', 'Holiday Romance': 'in0000196'}, 'Short': {'Short': 'in0000212'}, 'Sport': {'Baseball': 'in0000167', 'Basketball': 'in0000168', 'Boxing': 'in0000169', 'Extreme Sport': 'in0000170', 'Football': 'in0000171', 'Motorsport': 'in0000172', 'Soccer': 'in0000173', 'Sport': 'in0000174', 'Water Sport': 'in0000175'}, 'Thriller': {'Conspiracy Thriller': 'in0000176', 'Cyber Thriller': 'in0000177', 'Erotic Thriller': 'in0000178', 'Giallo': 'in0000179', 'Legal Thriller': 'in0000180', 'Political Thriller': 'in0000181', 'Psychological Thriller': 'in0000182', 'Serial Killer': 'in0000183', 'Spy': 'in0000184', 'Survival': 'in0000185', 'Thriller': 'in0000186'}, 'Western': {'Classical Western': 'in0000187', 'Contemporary Western': 'in0000188', 'Spaghetti Western': 'in0000190', 'Western': 'in0000191', 'Western Epic': 'in0000189'}} @@ -69,7 +67,8 @@ class ImdbHelper: timeout=10, proxies=proxies, session=requests.Session()) - self._free_imdb_req = RequestUtils(accept_type="application/json", proxies=proxies, session=requests.Session()) + self._free_imdb_req = RequestUtils(ua=settings.NORMAL_USER_AGENT, accept_type="application/json", + proxies=proxies, session=requests.Session()) self._imdb_client = httpx.AsyncClient(timeout=10, proxy=proxy_url, headers=self._imdb_headers) self._async_imdb_req = AsyncRequestUtils( @@ -80,6 +79,7 @@ class ImdbHelper: self._free_api_client = httpx.AsyncClient(timeout=10, proxy=proxy_url) self._async_free_api_req = AsyncRequestUtils( + ua=settings.NORMAL_USER_AGENT, accept_type="application/json", client=self._free_api_client ) @@ -736,6 +736,31 @@ class ImdbHelper: return None return r + def episodes_generator(self, title_id: str, season: Optional[str] = None) -> Generator[dict, None, None]: + """ + A generator that fetches all episodes. + :param title_id: IMDb title ID in the format "tt1234567". + :param season: The season number to filter episodes by. + :return: Episodes. + """ + page_token = None + while True: + response = self.episodes( + title_id=title_id, + season=season, + page_size=50, + page_token=page_token + ) + if not response: + return + + for episode in response.get("episodes", []): + yield episode + + page_token = response.get("nextPageToken") + if not page_token: + break + def seasons(self, title_id: str) -> Optional[List[dict]]: """ Retrieve the seasons associated with a specific title. @@ -821,18 +846,15 @@ class ImdbHelper: return None seasons_dict = {season.get('season'): {**season, 'episode_count': 0, 'air_date': '0000-00-00'} for season in seasons} - page_token = None - while True: - episodes = self.episodes(title_id, page_size=50, page_token=page_token) or {} - for episode in episodes.get('episodes', []): - s = episode.get('season') - seasons_dict[s]['episode_count'] += 1 - if not seasons_dict[s].get('release_date'): - seasons_dict[s]['air_date'] = ImdbHelper.release_date_string(episode.get('releaseDate', {})) - seasons_dict[s]['release_date'] = episode.get('releaseDate') - page_token = episodes.get('nextPageToken') - if not page_token: - break + for episode in self.episodes_generator(title_id): + s = episode.get('season') + if s not in seasons_dict: + continue + seasons_dict[s]['episode_count'] += 1 + if not seasons_dict[s].get('release_date'): + seasons_dict[s]['air_date'] = ImdbHelper.release_date_string(episode.get('releaseDate', {})) + seasons_dict[s]['release_date'] = episode.get('releaseDate') + return seasons_dict def match_by(self, name: str, mtype: Optional[MediaType] = None, year: Optional[str] = None) -> Optional[dict]: @@ -1056,6 +1078,31 @@ class ImdbHelper: return None return r + async def async_episodes_generator(self, title_id: str, season: Optional[str] = None) -> AsyncGenerator[dict, None]: + """ + An asynchronous generator that continuously fetches all episodes. + :param title_id: IMDb title ID in the format "tt1234567". + :param season: The season number to filter episodes by. + :return: Episodes. + """ + page_token = None + while True: + response = await self.async_episodes( + title_id=title_id, + season=season, + page_size=50, + page_token=page_token + ) + if not response: + return + + for episode in response.get("episodes", []): + yield episode + + page_token = response.get("nextPageToken") + if not page_token: + break + async def async_seasons(self, title_id: str) -> Optional[List[dict]]: """ Retrieve the seasons associated with a specific title. @@ -1141,18 +1188,14 @@ class ImdbHelper: return None seasons_dict = {season.get('season'): {**season, 'episode_count': 0, 'air_date': '0000-00-00'} for season in seasons} - page_token = None - while True: - episodes = await self.async_episodes(title_id, page_size=50, page_token=page_token) or {} - for episode in episodes.get('episodes', []): - s = episode.get('season') - seasons_dict[s]['episode_count'] += 1 - if not seasons_dict[s].get('release_date'): - seasons_dict[s]['air_date'] = ImdbHelper.release_date_string(episode.get('releaseDate', {})) - seasons_dict[s]['release_date'] = episode.get('releaseDate') - page_token = episodes.get('nextPageToken') - if not page_token: - break + async for episode in self.async_episodes_generator(title_id): + s = episode.get('season') + if s not in seasons_dict: + continue + seasons_dict[s]['episode_count'] += 1 + if not seasons_dict[s].get('release_date'): + seasons_dict[s]['air_date'] = ImdbHelper.release_date_string(episode.get('releaseDate', {})) + seasons_dict[s]['release_date'] = episode.get('releaseDate') return seasons_dict async def async_match_by(self, name: str, mtype: Optional[MediaType] = None, year: Optional[str] = None diff --git a/plugins.v2/lexiannot/__init__.py b/plugins.v2/lexiannot/__init__.py index 92a34d6..770b38e 100644 --- a/plugins.v2/lexiannot/__init__.py +++ b/plugins.v2/lexiannot/__init__.py @@ -1,14 +1,17 @@ from collections import Counter +from datetime import datetime +from enum import Enum import os -import re -import sys import json +import queue +import re +import shutil import subprocess +import sys import time import threading -import queue -import shutil -from typing import Any, List, Dict, Tuple, Optional, Union, Type, TypeVar +from typing import Any, Dict, List, Tuple, Optional, Union, Type, TypeVar +import uuid import venv from pathlib import Path @@ -20,6 +23,7 @@ import spacy from spacy.tokenizer import Tokenizer from app.core.config import settings +from app.helper.directory import DirectoryHelper from app.log import logger from app.plugins import _PluginBase from app.core.cache import cached @@ -33,9 +37,48 @@ from app.schemas.types import EventType from app.core.context import MediaInfo from app.plugins.lexiannot.query_gemini import DialogueTranslationTask, VocabularyTranslationTask, Vocabulary, Context + T = TypeVar('T', VocabularyTranslationTask, DialogueTranslationTask) +class TaskStatus(Enum): + PENDING = "pending" + RUNNING = "running" + COMPLETED = "completed" + FAILED = "failed" + CANCELED = "canceled" + IGNORED = "ignored" + + +class Task: + + def __init__(self, video_path: str, + task_id: Optional[str] = None, + status: TaskStatus = TaskStatus.PENDING, + add_time: Optional[datetime] = None, + complete_time: Optional[datetime] = None, + tokens_used: int = 0): + self.task_id = task_id or str(uuid.uuid4()) + self.video_path = video_path + self.status: TaskStatus = status + self.add_time: Optional[datetime] = add_time + self.complete_time: Optional[datetime] = complete_time + self.tokens_used: int = tokens_used + + def __repr__(self): + return f"" + + def to_dict(self): + return { + "task_id": self.task_id, + "video_path": self.video_path, + "status": self.status.value, + "add_time": self.add_time.isoformat() if self.add_time else None, + "complete_time": self.complete_time.isoformat() if self.complete_time else None, + "tokens_used": self.tokens_used + } + + class LexiAnnot(_PluginBase): # 插件名称 plugin_name = "美剧生词标注" @@ -44,7 +87,7 @@ class LexiAnnot(_PluginBase): # 插件图标 plugin_icon = "LexiAnnot.png" # 插件版本 - plugin_version = "1.1.0" + plugin_version = "1.1.1" # 插件作者 plugin_author = "wumode" # 作者主页 @@ -81,13 +124,13 @@ class LexiAnnot(_PluginBase): _exam_tags: List[str] = [] _spacy_model: str = '' _delete_data: bool = False + _libraries: List[str] = [] # protected variables _lexicon_repo = 'https://raw.githubusercontent.com/wumode/LexiAnnot/' _worker_thread = None - _task_queue = None + _task_queue: queue.Queue[Task] = queue.Queue() _shutdown_event = None - _client = None _total_token_count = 0 _venv_python = None _query_gemini_script = '' @@ -95,11 +138,11 @@ class LexiAnnot(_PluginBase): _accent_color_rgb = None _color_alpha = 0 _loaded = False - _config_updating_lock: Optional[threading.Lock] = None + _config_updating_lock: threading.Lock = threading.Lock() + _tasks_lock: threading.RLock = threading.RLock() + _tasks: Dict[str, Task] = {} def init_plugin(self, config=None): - self._task_queue = queue.Queue() - self._config_updating_lock = threading.Lock() self.stop_service() if config: self._enabled = config.get("enabled") @@ -127,7 +170,10 @@ class LexiAnnot(_PluginBase): self._spacy_model = config.get("spacy_model") or 'en_core_web_sm' self._exam_tags = config.get("exam_tags") or [] self._delete_data = config.get("delete_data") or False + self._libraries = config.get("libraries") or [] + libraries = [library.name for library in DirectoryHelper().get_library_dirs()] + self._libraries = [library for library in self._libraries if library in libraries] self._accent_color_rgb = LexiAnnot.hex_to_rgb(self._accent_color) or (255, 255, 0) self._color_alpha = int(self._opacity) if self._opacity and len(self._opacity) else 0 if self._delete_data: @@ -139,6 +185,10 @@ class LexiAnnot(_PluginBase): self.delete_data() self._delete_data = False self._loaded = False + + tasks = self.load_tasks() + with self._tasks_lock: + self._tasks = tasks if self._enabled: self._query_gemini_script = str(settings.ROOT_PATH / "app" / "plugins" / "lexiannot" / "query_gemini.py") @@ -158,6 +208,8 @@ class LexiAnnot(_PluginBase): """ 拼装插件配置页面,需要返回两块数据:1、页面配置;2、数据结构 """ + library_options = [{'title': library.name,'value': library.name} + for library in DirectoryHelper().get_library_dirs()] return [ { 'component': 'VForm', @@ -742,6 +794,34 @@ class LexiAnnot(_PluginBase): } ] }, + { + 'component': 'VRow', + 'props': { + 'style': { + 'margin-top': '0px' + } + }, + 'content': [ + { + 'component': 'VCol', + 'props': { + 'cols': 12, + }, + 'content': [ + { + 'component': 'VSelect', + 'props': { + 'chips': True, + 'multiple': True, + 'model': 'libraries', + 'label': '监控入库', + 'items': library_options + } + } + ] + } + ] + }, { 'component': 'VRow', 'props': { @@ -835,14 +915,90 @@ class LexiAnnot(_PluginBase): "opacity": '0', "spacy_model": 'en_core_web_sm', "exam_tags": [], - "delete_data": False + "delete_data": False, + "libraries": [] } def get_api(self) -> List[Dict[str, Any]]: pass def get_page(self) -> List[dict]: - pass + headers = [ + {'title': '添加时间', 'key': 'add_time', 'sortable': True}, + {'title': '视频文件', 'key': 'video_path', 'sortable': True}, + {'title': '消耗 Tokens', 'key': 'tokens_used', 'sortable': True}, + {'title': '完成时间', 'key': 'complete_time', 'sortable': True}, + {'title': '任务状态', 'key': 'status', 'sortable': True}, + ] + items = [] + with self._tasks_lock: + sorted_tasks = sorted( + self._tasks.items(), + key=lambda x: x[1].add_time, + reverse=True + ) + + status_map = { + TaskStatus.PENDING: "等待中", + TaskStatus.RUNNING: "处理中", + TaskStatus.COMPLETED: "已完成", + TaskStatus.IGNORED: "已忽略", + TaskStatus.FAILED: "失败", + TaskStatus.CANCELED: "已取消" + } + + for task_id, task in sorted_tasks: + status_text = status_map.get(task.status, task.status) + item = { + 'task_id': task_id, + 'status': status_text, + 'video_path': task.video_path, + 'add_time': task.add_time.strftime("%Y-%m-%d %H:%M:%S") if task.add_time else '-', + 'tokens_used': task.tokens_used, + 'complete_time': task.complete_time.strftime("%Y-%m-%d %H:%M:%S") if task.complete_time else '-', + } + items.append(item) + return [ + { + 'component': 'VRow', + 'props': { + 'style': { + 'overflow': 'hidden', + } + }, + 'content': [ + { + 'component': 'VRow', + 'props': { + 'class': 'd-none d-sm-block', + }, + 'content': [ + { + 'component': 'VCol', + 'props': { + 'cols': 12, + }, + 'content': [ + { + 'component': 'VDataTableVirtual', + 'props': { + 'class': 'text-sm', + 'headers': headers, + 'items': items, + 'height': '30rem', + 'density': 'compact', + 'fixed-header': True, + 'hide-no-data': True, + 'hover': True + } + } + ] + } + ] + } + ] + } + ] @staticmethod def get_command() -> List[Dict[str, Any]]: @@ -877,6 +1033,7 @@ class LexiAnnot(_PluginBase): logger.debug("ℹ️ No running worker thread to stop.") def delete_data(self): + # 删除词典 data_path = self.get_data_path() lexicon_path = data_path / 'lexicon.json' try: @@ -886,7 +1043,9 @@ class LexiAnnot(_PluginBase): pass except Exception as e: logger.error(f"词典 {lexicon_path} 删除失败: {e}") + self.__load_lexicon_from_local.cache_clear() + # 删除虚拟环境 venv_dir = data_path / "venv_genai" if os.path.exists(venv_dir): try: @@ -895,12 +1054,49 @@ class LexiAnnot(_PluginBase): except Exception as e: logger.error(f"虚拟环境 {venv_dir} 删除失败: {e}") + # 删除任务记录 + with self._tasks_lock: + self._tasks = {} + self.save_tasks() + + def load_tasks(self) -> Dict[str, Task]: + raw_tasks = self.get_data('tasks') or {} + tasks = {} + for task_id, task_dict in raw_tasks.items(): + try: + task = Task( + video_path=task_dict['video_path'], + task_id=task_dict['task_id'], + status=TaskStatus(task_dict['status']), + add_time=datetime.fromisoformat(task_dict['add_time']) if task_dict['add_time'] else None, + complete_time=datetime.fromisoformat(task_dict['complete_time']) + if task_dict['complete_time'] else None, + tokens_used=task_dict['tokens_used'], + ) + tasks[task_id] = task + except Exception as e: + logger.error(f"加载任务失败:{e}") + return tasks + + def save_tasks(self): + with self._tasks_lock: + tasks_dict = {task_id: task.to_dict() for task_id, task in self._tasks.items()} + self.save_data("tasks", tasks_dict) + + def add_task(self, video_file: str): + task = Task(video_path=video_file, add_time=datetime.now()) + with self._tasks_lock: + self._tasks[task.task_id] = task + self._task_queue.put(task) + self.save_tasks() + logger.info(f"加入任务队列: {video_file}") + def add_media_file(self, path: str): """ 添加新任务 """ if not self._shutdown_event.is_set(): - self._task_queue.put(path) + self.add_task(path) else: raise RuntimeError("Plugin is shutting down. Cannot add new tasks.") @@ -930,6 +1126,8 @@ class LexiAnnot(_PluginBase): 'opacity': self._opacity, 'spacy_model': self._spacy_model, 'exam_tags': self._exam_tags, + 'delete_data': self._delete_data, + 'libraries': self._libraries }) def __process_tasks(self): @@ -953,29 +1151,44 @@ class LexiAnnot(_PluginBase): if not self._gemini_apikey: logger.warn(f"未提供GEMINI APIKEY") self._gemini_available = False + with self._tasks_lock: + for task_id, task in self._tasks.items(): + if task.status == 'pending': + self._task_queue.put(task) while not self._shutdown_event.is_set(): try: - task = self._task_queue.get(timeout=1) # 最多等待1秒 + task = self._task_queue.get(timeout=1) if task is None: continue - self.__process_file(task) + tokens = self._total_token_count + try: + task.status = TaskStatus.RUNNING + task.status = self.__process_file(task.video_path) + except Exception as e: + task.status = TaskStatus.FAILED + logger.error(f"处理 {task} 出错: {e}") + finally: + self._task_queue.task_done() + task.complete_time = datetime.now() + task.tokens_used = self._total_token_count - tokens + self.save_tasks() except queue.Empty: continue logger.debug("🛑 Worker received shutdown signal, exiting...") - def __process_file(self, path: str): + def __process_file(self, path: str) -> TaskStatus: """ 处理视频文件 """ if not self._loaded: - return + return TaskStatus.FAILED lexicon = self.__load_lexicon_from_local() if not lexicon: logger.error(f"字典加载失败") - return + return TaskStatus.FAILED try: # 为减少内存占用,只在处理时加载 spaCy 模型 - nlp = spacy.load(self._spacy_model) + nlp = LexiAnnot.__load_nlp(self._spacy_model) infixes = list(nlp.Defaults.infixes) infixes = [i for i in infixes if '-' not in i] # 使用修改后的正则表达式重新创建 tokenizer @@ -989,17 +1202,17 @@ class LexiAnnot(_PluginBase): ) except Exception as e: logger.error(f"spaCy 模型 {self._spacy_model} 加载失败: {e}") - return + return TaskStatus.FAILED video = Path(path) if video.suffix.lower() not in settings.RMT_MEDIAEXT: - return + return TaskStatus.CANCELED if not video.exists() or not video.is_file(): logger.warn(f"文件 {str(video)} 不存在, 跳过") - return + return TaskStatus.FAILED subtitle = video.with_suffix(".en.ass") if subtitle.exists(): logger.warn(f"字幕文件 ({subtitle}) 已存在, 跳过") - return + return TaskStatus.IGNORED logger.info(f"📂 Processing file: {path}") if self._send_notify: message = f"正在处理文件: {path}" @@ -1014,7 +1227,7 @@ class LexiAnnot(_PluginBase): logger.info(f'提取到 {len(embedded_subtitles)} 条英语文本字幕') for embedded_subtitle in embedded_subtitles: if self._shutdown_event.is_set(): - return + return TaskStatus.CANCELED ass_subtitle = pysubs2.SSAFile.from_string(embedded_subtitle['subtitle'], format_='ass') if embedded_subtitle.get('codec_id') == 'S_TEXT/UTF8': ass_subtitle = LexiAnnot.set_srt_style(ass_subtitle) @@ -1022,7 +1235,7 @@ class LexiAnnot(_PluginBase): ass_subtitle = self.process_subtitles(ass_subtitle, lexicon.get('cefr'), lexicon.get('coca20k'), lexicon.get('examinations'),lexicon.get('swear_words'), nlp) if self._shutdown_event.is_set(): - return + return TaskStatus.CANCELED if ass_subtitle: try: ass_subtitle.save(str(subtitle)) @@ -1044,6 +1257,8 @@ class LexiAnnot(_PluginBase): mtype=NotificationType.Plugin, text=f"{ret_message}") + return TaskStatus.COMPLETED + @cached(maxsize=1000, ttl=1800) def __load_lexicon_version(self) -> Optional[str]: logger.info(f"正在检查远程词典文件版本...") @@ -1053,6 +1268,7 @@ class LexiAnnot(_PluginBase): return None return version.strip() + @cached(maxsize=1, ttl=3600*6) def __load_lexicon_from_local(self) -> Optional[Dict[str, Any]]: data_path = self.get_data_path() lexicon = {} @@ -1067,6 +1283,11 @@ class LexiAnnot(_PluginBase): return None return lexicon + @staticmethod + @cached(maxsize=1, ttl=3600*6) + def __load_nlp(model: str) -> spacy.Language: + return spacy.load(model) + def __retrieve_lexicon_online(self, version: str) -> Optional[Dict[str, Any]]: logger.info('开始下载词典文件...') lexicon_files = ['cefr', 'coca20k', 'swear_words', 'examinations'] @@ -1094,7 +1315,8 @@ class LexiAnnot(_PluginBase): 测试插件数据加载 """ try: - nlp = spacy.load(self._spacy_model) + logger.info(f"加载 spaCy 模型 {self._spacy_model}...") + nlp = LexiAnnot.__load_nlp(self._spacy_model) except OSError: nlp = LexiAnnot.__load_spacy_model(self._spacy_model) lexicon = self.__load_lexicon_from_local() @@ -1112,13 +1334,14 @@ class LexiAnnot(_PluginBase): @staticmethod def __load_spacy_model(model_name: str): try: + logger.info(f"下载 spaCy 模型 {model_name}...") subprocess.run( [sys.executable, "-m", "spacy", "download", model_name], capture_output=True, text=True, check=True ) - nlp = spacy.load(model_name) + nlp = LexiAnnot.__load_nlp(model_name) logger.info(f"spaCy 模型 '{model_name}' 加载成功!") return nlp except subprocess.CalledProcessError as e: @@ -1143,9 +1366,20 @@ class LexiAnnot(_PluginBase): transfer_info: TransferInfo = event_info.get("transferinfo") if not transfer_info or not transfer_info.target_diritem or not transfer_info.target_diritem.path: return + + # 检查是否为选择的媒体库 + in_libraries = False + libraries = {library.name: library.library_path for library in DirectoryHelper().get_library_dirs()} + for library_name in self._libraries: + if Path(transfer_info.target_diritem.path).is_relative_to(Path(libraries[library_name])): + in_libraries = True + break + if not in_libraries: + return + mediainfo: MediaInfo = event_info.get("mediainfo") if self._english_only: - if mediainfo.original_language != 'en': + if mediainfo.original_language and mediainfo.original_language != 'en': logger.info(f"原始语言 ({mediainfo.original_language}) 不为英语, 跳过 {mediainfo.title}: ") return for new_path in transfer_info.file_list_new: @@ -1488,7 +1722,7 @@ class LexiAnnot(_PluginBase): subtitle_stream_index = track.stream_identifier # MediaInfo 的 stream_id 从 1 开始,ffmpeg 从 0 开始 subtitle = LexiAnnot.__extract_subtitle(video_path, subtitle_stream_index, ffmpeg) if subtitle: - subtitles.append({'title': track.title, 'subtitle': subtitle, 'codec_id': track.codec_id, + subtitles.append({'title': track.title or '', 'subtitle': subtitle, 'codec_id': track.codec_id, 'stream_id': subtitle_stream_index}) if subtitles: return subtitles @@ -1536,7 +1770,7 @@ class LexiAnnot(_PluginBase): temperature: float ) -> List[T]: input_dict = { - 'tasks': [task.dict() for task in tasks], # 保证是可序列化格式 + 'tasks': [task.dict() for task in tasks], 'params': { 'api_key': api_key, 'system_instruction': system_instruction, @@ -1570,6 +1804,7 @@ class LexiAnnot(_PluginBase): return tasks try: + self._total_token_count += response['data']['total_token_count'] or 0 return [task_type(**task_data) for task_data in response["data"]["tasks"]] except Exception as e: logger.warning(f"Failed to reconstruct tasks: {str(e)}")