diff --git a/plugins.v2/airecognizerenhancer/ARCHITECTURE.md b/plugins.v2/airecognizerenhancer/ARCHITECTURE.md index 314622b..76ac470 100644 --- a/plugins.v2/airecognizerenhancer/ARCHITECTURE.md +++ b/plugins.v2/airecognizerenhancer/ARCHITECTURE.md @@ -50,6 +50,36 @@ - `request_timeout` - `max_retries` - `save_failed_samples` +- `save_title_only_samples` +- `max_failed_samples` +- `auto_remove_applied_sample` +- `clear_failed_samples_once` + +## 新增数据面 + +### 可处理失败样本 + +用于承载低置信度、可继续分析和出队的样本数据,支持: + +- 摘要列表 +- 洞察汇总 +- 重放复查 +- 批量复查 +- 批量建议 +- 批量写入 +- 清空与按索引出队 + +### LLM 错误诊断记录 + +用于承载超时、网络错误、模型不可用等 LLM 调用失败信息,和可处理失败样本分开存储,避免噪音污染主样本池。 + +### 样本来源标注 + +失败样本与诊断记录都会保留轻量 provenance 标记,便于区分: + +- 路径样本 +- 仅标题样本 +- 来自哪个 source plugin ## 二期规划 @@ -78,6 +108,8 @@ - 已支持失败样本复查:按当前识别词和当前识别器重跑,并可自动把已修复样本出队 - 已支持失败样本批量复查:可批量重跑并按结果批量出队 - 已支持失败样本批量建议与批量写入:可批量生成建议并批量落库 +- 已支持 LLM 错误诊断记录独立存储,避免污染可处理样本池 +- 已支持样本来源标注,便于区分路径样本与仅标题样本 - 已支持低 token 精简摘要输出,适合作为智能体批处理入口 - 已支持识别词建议模型退化时自动切换到精确规则兜底,优先保证稳定落地 - 下一步重点会放在提示词打磨、失败样本回放和识别词建议质量提升 diff --git a/plugins.v2/airecognizerenhancer/README.md b/plugins.v2/airecognizerenhancer/README.md index e10d001..f3585c7 100644 --- a/plugins.v2/airecognizerenhancer/README.md +++ b/plugins.v2/airecognizerenhancer/README.md @@ -45,18 +45,22 @@ MoviePilot 原版智能体已经提供“整理失败后自动接管再试一次 - 用当前 LLM 结构化判断标题、年份、类型、季集 - 回写 `name / year / season / episode` - 交回 MoviePilot 原生链路继续二次识别 -- 保存低置信度失败样本 +- 保存低置信度失败样本(可处理) +- 保存 LLM 调用错误诊断记录(独立存储,不污染可处理样本池) +- 失败样本和 LLM 诊断记录附带来源标注(`sample_source_kind` / `sample_source_plugin`) +- 可配置是否保存仅标题样本(无真实文件路径),默认关闭以减少噪音 - 提供失败样本工作清单、洞察、重放、删除和清空能力 - 生成并应用 `CustomIdentifiers` 建议 +- 设置页提供“保存时清空失败样本(一次性)”开关,可在保存配置时顺手重置失败样本池 ## 主要接口 -- `GET /api/v1/plugin/AIRecognizerEnhancer/health` - - 查看插件状态、LLM 提供方、模型、阈值和超时配置 -- `POST /api/v1/plugin/AIRecognizerEnhancer/recognize` - - 对单个标题做一次本地结构化识别测试 +### 可处理失败样本接口 + +这些接口只返回因置信度不足或名称为空而落盘的识别失败记录,可用于生成识别词建议、复查和出队。 + - `GET /api/v1/plugin/AIRecognizerEnhancer/failed_samples` - - 查看最近保存的失败样本 + - 查看最近保存的可处理失败样本 - `GET /api/v1/plugin/AIRecognizerEnhancer/sample_worklist` - 返回适合继续处理的失败样本摘要列表 - `GET /api/v1/plugin/AIRecognizerEnhancer/sample_insights` @@ -68,12 +72,23 @@ MoviePilot 原版智能体已经提供“整理失败后自动接管再试一次 - `POST /api/v1/plugin/AIRecognizerEnhancer/apply_suggested_identifier` - 把建议规则写入系统 `CustomIdentifiers` +### LLM 诊断错误接口 + +这些接口返回因 LLM 调用异常(如超时、网络错误、模型不可用)而产生的诊断记录。它们不参与识别词生成流程,仅供排查 LLM 问题使用。 + +- `GET /api/v1/plugin/AIRecognizerEnhancer/llm_errors` + - 查看 LLM 调用失败的诊断记录 +- `POST /api/v1/plugin/AIRecognizerEnhancer/clear_llm_errors` + - 清空 LLM 错误诊断记录 + 其余批量接口和清理接口可以按需要继续使用,详细路径以插件 `get_api()` 暴露结果为准。 ## 配置建议 - 先确认 MoviePilot 本身已经配置好可用的 LLM -- 建议保持“保存失败样本”开启 +- 建议保持”保存失败样本”开启 +- 默认情况下”保存仅标题样本”是关闭的,这可以减少没有真实文件路径的低价值噪音;如果你的使用场景以纯标题匹配为主,可以在设置中手动开启 +- 如果失败样本池已经积累了大量历史噪音,可在设置页勾选“一次性清空”后保存 - 如果你经常处理历史资源或网盘资源,建议定期查看: - `failed_samples` - `sample_worklist` @@ -81,9 +96,9 @@ MoviePilot 原版智能体已经提供“整理失败后自动接管再试一次 ## 已验证情况 -当前版本:`0.1.12` +当前版本:`0.1.13` -当前 Release:https://github.com/liuyuexi1987/MoviePilot-Plugins/releases/tag/v0.2.68 +当前 Release:https://github.com/liuyuexi1987/MoviePilot-Plugins/releases/tag/v0.2.73 这版已经验证过: diff --git a/plugins.v2/airecognizerenhancer/__init__.py b/plugins.v2/airecognizerenhancer/__init__.py index eee252e..33458c1 100644 --- a/plugins.v2/airecognizerenhancer/__init__.py +++ b/plugins.v2/airecognizerenhancer/__init__.py @@ -53,7 +53,7 @@ class AIRecognizerEnhancer(_PluginBase): plugin_name = "AI识别增强" plugin_desc = "直接复用 MoviePilot 当前 LLM 配置,在原生识别失败后做本地结构化识别兜底,并交回原生链路继续二次识别。" plugin_icon = "https://raw.githubusercontent.com/liuyuexi1987/MoviePilot-Plugins/main/icons/airecognizerenhancer.png" - plugin_version = "0.1.12" + plugin_version = "0.1.13" plugin_author = "liuyuexi1987" plugin_level = 1 author_url = "https://github.com/liuyuexi1987" @@ -67,8 +67,10 @@ class AIRecognizerEnhancer(_PluginBase): _request_timeout = 25 _max_retries = 2 _save_failed_samples = True + _save_title_only_samples = False _max_failed_samples = 200 _auto_remove_applied_sample = True + _clear_failed_samples_once = False _systemconfig: Optional[SystemConfigOper] = None def init_plugin(self, config: Optional[Dict[str, Any]] = None): @@ -79,10 +81,17 @@ class AIRecognizerEnhancer(_PluginBase): self._request_timeout = self._safe_int(config.get("request_timeout"), 25) self._max_retries = max(1, min(5, self._safe_int(config.get("max_retries"), 2))) self._save_failed_samples = bool(config.get("save_failed_samples", True)) + self._save_title_only_samples = bool(config.get("save_title_only_samples", False)) self._max_failed_samples = max(20, min(1000, self._safe_int(config.get("max_failed_samples"), 200))) self._auto_remove_applied_sample = bool(config.get("auto_remove_applied_sample", True)) + self._clear_failed_samples_once = bool(config.get("clear_failed_samples_once", False)) self._systemconfig = SystemConfigOper() self._register_events() + if self._clear_failed_samples_once: + cleared = self._clear_failed_samples() + self._clear_failed_samples_once = False + self.update_config(self._build_config({"clear_failed_samples_once": False})) + logger.info(f"[AI识别增强] 已按配置清空失败样本 {cleared} 条") def get_state(self) -> bool: return self._enabled @@ -117,11 +126,28 @@ class AIRecognizerEnhancer(_PluginBase): if header.lower().startswith("bearer "): return header.split(" ", 1)[1].strip() if body: - for key in ("apikey", "api_key"): + for key in ("apikey", "api_key", "token"): token = str(body.get(key) or "").strip() if token: return token - return str(request.query_params.get("apikey") or "").strip() + return str(request.query_params.get("apikey") or request.query_params.get("token") or "").strip() + + def _build_config(self, overrides: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + config = { + "enabled": self._enabled, + "debug": self._debug, + "confidence_threshold": self._confidence_threshold, + "request_timeout": self._request_timeout, + "max_retries": self._max_retries, + "save_failed_samples": self._save_failed_samples, + "save_title_only_samples": self._save_title_only_samples, + "max_failed_samples": self._max_failed_samples, + "auto_remove_applied_sample": self._auto_remove_applied_sample, + "clear_failed_samples_once": self._clear_failed_samples_once, + } + if overrides: + config.update(overrides) + return config def _check_api_access(self, request: Request, body: Optional[Dict[str, Any]] = None) -> Tuple[bool, str]: expected = str(getattr(settings, "API_TOKEN", "") or "").strip() @@ -174,6 +200,30 @@ class AIRecognizerEnhancer(_PluginBase): ) return str(title or "").strip(), str(path or "").strip() + @staticmethod + def _extract_provenance(event_data: Any) -> Dict[str, str]: + """Extract lightweight provenance metadata from event data for sample recording.""" + source_plugin = "" + if isinstance(event_data, dict): + source_plugin = str(event_data.get("source_plugin") or "").strip() + else: + source_plugin = str(getattr(event_data, "source_plugin", "") or "").strip() + + title = "" + path = "" + if isinstance(event_data, dict): + title = str(event_data.get("title") or event_data.get("name") or event_data.get("org_string") or "").strip() + path = str(event_data.get("path") or event_data.get("file_path") or event_data.get("org_string") or "").strip() + else: + title = str(getattr(event_data, "title", "") or getattr(event_data, "name", "") or getattr(event_data, "org_string", "") or "").strip() + path = str(getattr(event_data, "path", "") or getattr(event_data, "file_path", "") or getattr(event_data, "org_string", "") or "").strip() + + is_path_backed = bool(path) and path != title and ("/" in path or "\\" in path) + return { + "sample_source_kind": "path_backed" if is_path_backed else "title_only", + "sample_source_plugin": source_plugin, + } + def _build_meta_hint(self, raw_text: str) -> Dict[str, Any]: try: meta = MetaInfo(raw_text) @@ -221,6 +271,12 @@ class AIRecognizerEnhancer(_PluginBase): def _sample_path(self) -> Path: return self.get_data_path() / "failed_samples.jsonl" + def _llm_errors_path(self) -> Path: + return self.get_data_path() / "llm_errors.jsonl" + + def _failed_sample_cap(self) -> int: + return max(20, min(1000, self._safe_int(self._max_failed_samples, 200))) + @staticmethod def _sample_identity(payload: Dict[str, Any]) -> str: return json.dumps( @@ -236,7 +292,8 @@ class AIRecognizerEnhancer(_PluginBase): def _write_failed_samples(self, rows: List[Dict[str, Any]]) -> None: sample_path = self._sample_path() sample_path.parent.mkdir(parents=True, exist_ok=True) - trimmed = rows[-self._max_failed_samples:] + filtered = [row for row in rows if not str(row.get("reason") or "").startswith("llm_error:")] + trimmed = filtered[-self._failed_sample_cap():] with sample_path.open("w", encoding="utf-8") as f: for row in trimmed: f.write(json.dumps(row, ensure_ascii=False) + "\n") @@ -254,6 +311,69 @@ class AIRecognizerEnhancer(_PluginBase): except Exception as exc: logger.warning(f"[AI识别增强] 写入失败样本失败: {exc}") + def _record_llm_error(self, title: str, path: str, meta_hint: Dict[str, Any], error: Any, provenance: Optional[Dict[str, str]] = None) -> None: + try: + error_path = self._llm_errors_path() + error_path.parent.mkdir(parents=True, exist_ok=True) + provenance = provenance or {} + entry = { + "title": title, + "path": path, + "meta_hint": meta_hint, + "reason": f"llm_error:{error}", + "timestamp": __import__("datetime").datetime.now().isoformat(), + "sample_source_kind": provenance.get("sample_source_kind", "unknown"), + "sample_source_plugin": provenance.get("sample_source_plugin", ""), + } + existing = self._read_llm_errors(limit=1000) + existing.reverse() + new_identity = {"title": title, "path": path, "reason": entry["reason"]} + existing = [ + row for row in existing + if { + "title": row.get("title"), + "path": row.get("path"), + "reason": row.get("reason"), + } != new_identity + ] + existing.append(entry) + trimmed = existing[-self._failed_sample_cap():] + with error_path.open("w", encoding="utf-8") as f: + for row in trimmed: + f.write(json.dumps(row, ensure_ascii=False) + "\n") + except Exception as exc: + logger.warning(f"[AI识别增强] 写入 LLM 错误诊断记录失败: {exc}") + + def _read_llm_errors(self, limit: int = 20) -> List[Dict[str, Any]]: + error_path = self._llm_errors_path() + if not error_path.exists(): + return [] + rows: List[Dict[str, Any]] = [] + try: + with error_path.open("r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line: + continue + try: + rows.append(json.loads(line)) + except Exception: + continue + except Exception as exc: + logger.warning(f"[AI识别增强] 读取 LLM 错误诊断记录失败: {exc}") + return [] + if limit > 0: + rows = rows[-limit:] + rows.reverse() + return rows + + def _clear_llm_errors(self) -> int: + rows = self._read_llm_errors(limit=10000) + error_path = self._llm_errors_path() + if error_path.exists(): + error_path.unlink() + return len(rows) + def _read_failed_samples(self, limit: int = 20) -> List[Dict[str, Any]]: sample_path = self._sample_path() if not sample_path.exists(): @@ -353,7 +473,7 @@ class AIRecognizerEnhancer(_PluginBase): sample_index: Optional[Any] = None, limit: int = 100, ) -> Tuple[Optional[int], Optional[Dict[str, Any]], str]: - samples = self._read_failed_samples(limit=max(1, min(limit, 200))) + samples = self._read_failed_samples(limit=max(1, min(limit, self._failed_sample_cap()))) if not samples: return None, None, "暂无失败样本" index = self._safe_int(sample_index, 0) @@ -369,9 +489,13 @@ class AIRecognizerEnhancer(_PluginBase): self, sample_indexes: Optional[List[Any]] = None, limit: int = 10, - pool_limit: int = 200, + pool_limit: int = 0, ) -> Tuple[List[int], List[Dict[str, Any]], str]: - current_samples = self._inject_sample_indices(self._read_failed_samples(limit=max(1, min(pool_limit, 1000)))) + if pool_limit <= 0: + pool_limit = self._failed_sample_cap() + current_samples = self._inject_sample_indices( + self._read_failed_samples(limit=max(1, min(pool_limit, self._failed_sample_cap()))) + ) if not current_samples: return [], [], "暂无失败样本" if isinstance(sample_indexes, list) and sample_indexes: @@ -414,6 +538,8 @@ class AIRecognizerEnhancer(_PluginBase): "title": sample.get("title"), "path": sample.get("path"), "reason": sample.get("reason"), + "sample_source_kind": sample.get("sample_source_kind", ""), + "sample_source_plugin": sample.get("sample_source_plugin", ""), "guess_name": guess.get("name"), "guess_confidence": self._safe_float(guess.get("confidence"), 0.0), "verified_title": verified.get("title"), @@ -551,7 +677,10 @@ class AIRecognizerEnhancer(_PluginBase): label = self._sample_display_name(summary) confidence = round(self._safe_float(summary.get("guess_confidence"), 0.0), 2) can_suggest = "可建议" if summary.get("can_auto_suggest") else "需人工" - lines.append(f"{summary.get('sample_index')}. {label} | 置信度 {confidence} | {can_suggest}") + source_tag = "有路径" if summary.get("sample_source_kind") == "path_backed" else "仅标题" + source_plugin = summary.get("sample_source_plugin") or "" + source_info = f" | {source_tag}" + (f" ({source_plugin})" if source_plugin else "") + lines.append(f"{summary.get('sample_index')}. {label} | 置信度 {confidence} | {can_suggest}{source_info}") lines.append("下一步:可直接调用批量建议或批量复查接口。") return "\n".join(lines) @@ -937,7 +1066,7 @@ AI 识别增强结果: selected_indexes, _, message = self._select_failed_sample_indexes( sample_indexes=body.get("sample_indexes"), limit=limit, - pool_limit=200, + pool_limit=self._failed_sample_cap(), ) if not selected_indexes: return {"success": False, "message": message} @@ -1006,7 +1135,7 @@ AI 识别增强结果: selected_indexes, _, message = self._select_failed_sample_indexes( sample_indexes=body.get("sample_indexes"), limit=limit, - pool_limit=200, + pool_limit=self._failed_sample_cap(), ) if not selected_indexes: return {"success": False, "message": message} @@ -1102,7 +1231,7 @@ AI 识别增强结果: selected_indexes, _, message = self._select_failed_sample_indexes( sample_indexes=body.get("sample_indexes"), limit=limit, - pool_limit=200, + pool_limit=self._failed_sample_cap(), ) if not selected_indexes: return {"success": False, "message": message} @@ -1356,40 +1485,49 @@ AI 识别增强结果: logger.warning(f"[AI识别增强] 二次校验失败: {exc}") return None - def _recognize(self, title: str, path: str = "", record_failed_sample: bool = True) -> Dict[str, Any]: + def _recognize( + self, title: str, path: str = "", record_failed_sample: bool = True, + provenance: Optional[Dict[str, str]] = None, + ) -> Dict[str, Any]: title = str(title or "").strip() path = str(path or "").strip() if not title and path: title = Path(path).name if not title: return {"success": False, "message": "标题为空"} + provenance = provenance or {} + sample_source_kind = provenance.get("sample_source_kind") + is_title_only = sample_source_kind == "title_only" if sample_source_kind else not path try: guess = self._invoke_llm(title, path) except Exception as exc: if record_failed_sample: - self._record_failed_sample( - { - "title": title, - "path": path, - "meta_hint": self._build_meta_hint(path or title), - "reason": f"llm_error:{exc}", - } - ) + if is_title_only and not self._save_title_only_samples: + if self._debug: + logger.info(f"[AI识别增强] 跳过保存仅标题 LLM 错误: {title} (save_title_only_samples=False)") + else: + self._record_llm_error(title, path, self._build_meta_hint(path or title), exc, provenance=provenance) return {"success": False, "message": f"LLM 调用失败: {exc}"} verified = self._verify_guess(title, path, guess) passed = bool(guess.name and guess.confidence >= self._confidence_threshold) if not passed and record_failed_sample: - self._record_failed_sample( - { - "title": title, - "path": path, - "meta_hint": self._build_meta_hint(path or title), - "guess": guess.model_dump(), - "verified_media_info": self._compact_verified_summary(verified), - "reason": "low_confidence_or_empty_name", - } - ) + if is_title_only and not self._save_title_only_samples: + if self._debug: + logger.info(f"[AI识别增强] 跳过保存仅标题样本: {title} (save_title_only_samples=False)") + else: + self._record_failed_sample( + { + "title": title, + "path": path, + "meta_hint": self._build_meta_hint(path or title), + "guess": guess.model_dump(), + "verified_media_info": self._compact_verified_summary(verified), + "reason": "low_confidence_or_empty_name", + "sample_source_kind": provenance.get("sample_source_kind", "unknown"), + "sample_source_plugin": provenance.get("sample_source_plugin", ""), + } + ) return { "success": passed, "message": "success" if passed else "识别结果置信度不足,已放弃注入", @@ -1404,7 +1542,8 @@ AI 识别增强结果: title, path = self._extract_title_path(event_data) if not title and not path: return - result = self._recognize(title=title, path=path) + provenance = self._extract_provenance(event_data) + result = self._recognize(title=title, path=path, provenance=provenance) if not result.get("success"): if self._debug: logger.info(f"[AI识别增强] 跳过注入: {title or path} - {result.get('message')}") @@ -1496,7 +1635,7 @@ AI 识别增强结果: if not ok: return {"success": False, "message": message} limit = self._safe_int(request.query_params.get("limit"), 50) - limit = max(1, min(limit, 200)) + limit = max(1, min(limit, self._failed_sample_cap())) top = self._safe_int(request.query_params.get("top"), 10) top = max(1, min(top, 20)) samples = self._inject_sample_indices(self._read_failed_samples(limit=limit)) @@ -1512,7 +1651,7 @@ AI 识别增强结果: return {"success": False, "message": message} limit = self._safe_int(request.query_params.get("limit"), 5) limit = max(1, min(limit, 20)) - samples = self._inject_sample_indices(self._read_failed_samples(limit=100)) + samples = self._inject_sample_indices(self._read_failed_samples(limit=self._failed_sample_cap())) return { "success": True, "data": { @@ -1558,6 +1697,34 @@ AI 识别增强结果: }, } + async def api_llm_errors(self, request: Request): + ok, message = self._check_api_access(request) + if not ok: + return {"success": False, "message": message} + limit = self._safe_int(request.query_params.get("limit"), 20) + limit = max(1, min(limit, 100)) + errors = self._read_llm_errors(limit=limit) + return { + "success": True, + "data": { + "count": len(errors), + "errors": errors, + }, + } + + async def api_clear_llm_errors(self, request: Request): + ok, message = self._check_api_access(request) + if not ok: + return {"success": False, "message": message} + cleared = self._clear_llm_errors() + return { + "success": True, + "message": "success", + "data": { + "cleared_count": cleared, + }, + } + async def api_remove_failed_sample(self, request: Request): body = await request.json() ok, message = self._check_api_access(request, body) @@ -1697,6 +1864,18 @@ AI 识别增强结果: "methods": ["POST"], "summary": "清空失败样本文件", }, + { + "path": "/llm_errors", + "endpoint": self.api_llm_errors, + "methods": ["GET"], + "summary": "查看 LLM 调用失败的诊断记录", + }, + { + "path": "/clear_llm_errors", + "endpoint": self.api_clear_llm_errors, + "methods": ["POST"], + "summary": "清空 LLM 错误诊断记录", + }, { "path": "/remove_failed_sample", "endpoint": self.api_remove_failed_sample, @@ -1731,7 +1910,8 @@ AI 识别增强结果: def get_page(self) -> List[dict]: llm_ready = bool(getattr(settings, "LLM_API_KEY", None)) - failed_samples_count = len(self._read_failed_samples(limit=200)) + failed_samples_count = len(self._read_failed_samples(limit=self._failed_sample_cap())) + llm_errors_count = len(self._read_llm_errors(limit=self._max_failed_samples)) custom_identifiers_count = len(self._get_custom_identifiers()) llm_provider = getattr(settings, "LLM_PROVIDER", "—") llm_model = getattr(settings, "LLM_MODEL", "—") @@ -1784,22 +1964,27 @@ AI 识别增强结果: "content": [ { "component": "VCol", - "props": {"cols": 12, "md": 3}, + "props": {"cols": 12, "sm": 6, "md": 2}, "content": [stat_card("当前状态", "已启用" if self._enabled else "未启用")], }, { "component": "VCol", - "props": {"cols": 12, "md": 3}, + "props": {"cols": 12, "sm": 6, "md": 2}, "content": [stat_card("LLM 可用", "是" if llm_ready else "否", f"{llm_provider} / {llm_model}")], }, { "component": "VCol", - "props": {"cols": 12, "md": 3}, - "content": [stat_card("失败样本", f"{failed_samples_count} 条", f"上限 {self._max_failed_samples} 条")], + "props": {"cols": 12, "sm": 6, "md": 3}, + "content": [stat_card("可处理失败样本", f"{failed_samples_count} 条", f"上限 {self._max_failed_samples} 条")], }, { "component": "VCol", - "props": {"cols": 12, "md": 3}, + "props": {"cols": 12, "sm": 6, "md": 2}, + "content": [stat_card("LLM 错误", f"{llm_errors_count} 条", "诊断记录")], + }, + { + "component": "VCol", + "props": {"cols": 12, "sm": 6, "md": 3}, "content": [stat_card("自定义识别词", f"{custom_identifiers_count} 条", "系统 CustomIdentifiers")], }, ], @@ -1810,34 +1995,7 @@ AI 识别增强结果: "content": [ { "component": "VCol", - "props": {"cols": 12, "md": 6}, - "content": [ - { - "component": "VCard", - "props": {"variant": "outlined", "class": "pa-4 h-100"}, - "content": [ - { - "component": "div", - "props": {"class": "text-subtitle-1 font-weight-bold mb-2"}, - "text": "识别兜底", - }, - { - "component": "div", - "props": {"class": "text-body-2 text-medium-emphasis"}, - "text": "在 Chain NameRecognize 阶段回写 name / year / season / episode,供 MoviePilot 继续原生二次识别。", - }, - { - "component": "div", - "props": {"class": "text-caption text-medium-emphasis mt-3"}, - "text": f"置信度阈值:{self._confidence_threshold};请求超时:{self._request_timeout} 秒", - }, - ], - } - ], - }, - { - "component": "VCol", - "props": {"cols": 12, "md": 6}, + "props": {"cols": 12, "md": 12}, "content": [ { "component": "VCard", @@ -1873,6 +2031,7 @@ AI 识别增强结果: return "vuetify", None def get_form(self) -> Tuple[List[dict], Dict[str, Any]]: + failed_samples_count = len(self._read_failed_samples(limit=self._failed_sample_cap())) form = [ { "component": "VForm", @@ -1896,6 +2055,25 @@ AI 识别增强结果: } ], }, + { + "component": "VRow", + "content": [ + { + "component": "VCol", + "props": {"cols": 12}, + "content": [ + { + "component": "VAlert", + "props": { + "type": "warning", + "variant": "tonal", + "text": f"当前累计 {failed_samples_count} 条失败样本。如需重置噪音数据,请勾选下方“一次性清空”开关后点击保存。该操作只清空失败样本,不会删除已写入的 CustomIdentifiers。", + }, + } + ], + } + ], + }, { "component": "VRow", "content": [ @@ -1929,6 +2107,19 @@ AI 识别增强结果: } ], }, + { + "component": "VCol", + "props": {"cols": 12, "md": 4}, + "content": [ + { + "component": "VSwitch", + "props": { + "model": "save_title_only_samples", + "label": "保存仅标题样本", + }, + } + ], + }, ], }, { @@ -2010,6 +2201,24 @@ AI 识别增强结果: } ], }, + { + "component": "VRow", + "content": [ + { + "component": "VCol", + "props": {"cols": 12}, + "content": [ + { + "component": "VSwitch", + "props": { + "model": "clear_failed_samples_once", + "label": "保存时清空失败样本(一次性)", + }, + } + ], + } + ], + }, { "component": "VRow", "content": [ @@ -2038,6 +2247,8 @@ AI 识别增强结果: "request_timeout": 25, "max_retries": 2, "save_failed_samples": True, + "save_title_only_samples": False, "max_failed_samples": 200, "auto_remove_applied_sample": True, + "clear_failed_samples_once": False, } diff --git a/plugins/airecognizerenhancer/ARCHITECTURE.md b/plugins/airecognizerenhancer/ARCHITECTURE.md index 314622b..76ac470 100644 --- a/plugins/airecognizerenhancer/ARCHITECTURE.md +++ b/plugins/airecognizerenhancer/ARCHITECTURE.md @@ -50,6 +50,36 @@ - `request_timeout` - `max_retries` - `save_failed_samples` +- `save_title_only_samples` +- `max_failed_samples` +- `auto_remove_applied_sample` +- `clear_failed_samples_once` + +## 新增数据面 + +### 可处理失败样本 + +用于承载低置信度、可继续分析和出队的样本数据,支持: + +- 摘要列表 +- 洞察汇总 +- 重放复查 +- 批量复查 +- 批量建议 +- 批量写入 +- 清空与按索引出队 + +### LLM 错误诊断记录 + +用于承载超时、网络错误、模型不可用等 LLM 调用失败信息,和可处理失败样本分开存储,避免噪音污染主样本池。 + +### 样本来源标注 + +失败样本与诊断记录都会保留轻量 provenance 标记,便于区分: + +- 路径样本 +- 仅标题样本 +- 来自哪个 source plugin ## 二期规划 @@ -78,6 +108,8 @@ - 已支持失败样本复查:按当前识别词和当前识别器重跑,并可自动把已修复样本出队 - 已支持失败样本批量复查:可批量重跑并按结果批量出队 - 已支持失败样本批量建议与批量写入:可批量生成建议并批量落库 +- 已支持 LLM 错误诊断记录独立存储,避免污染可处理样本池 +- 已支持样本来源标注,便于区分路径样本与仅标题样本 - 已支持低 token 精简摘要输出,适合作为智能体批处理入口 - 已支持识别词建议模型退化时自动切换到精确规则兜底,优先保证稳定落地 - 下一步重点会放在提示词打磨、失败样本回放和识别词建议质量提升 diff --git a/plugins/airecognizerenhancer/README.md b/plugins/airecognizerenhancer/README.md index e10d001..f3585c7 100644 --- a/plugins/airecognizerenhancer/README.md +++ b/plugins/airecognizerenhancer/README.md @@ -45,18 +45,22 @@ MoviePilot 原版智能体已经提供“整理失败后自动接管再试一次 - 用当前 LLM 结构化判断标题、年份、类型、季集 - 回写 `name / year / season / episode` - 交回 MoviePilot 原生链路继续二次识别 -- 保存低置信度失败样本 +- 保存低置信度失败样本(可处理) +- 保存 LLM 调用错误诊断记录(独立存储,不污染可处理样本池) +- 失败样本和 LLM 诊断记录附带来源标注(`sample_source_kind` / `sample_source_plugin`) +- 可配置是否保存仅标题样本(无真实文件路径),默认关闭以减少噪音 - 提供失败样本工作清单、洞察、重放、删除和清空能力 - 生成并应用 `CustomIdentifiers` 建议 +- 设置页提供“保存时清空失败样本(一次性)”开关,可在保存配置时顺手重置失败样本池 ## 主要接口 -- `GET /api/v1/plugin/AIRecognizerEnhancer/health` - - 查看插件状态、LLM 提供方、模型、阈值和超时配置 -- `POST /api/v1/plugin/AIRecognizerEnhancer/recognize` - - 对单个标题做一次本地结构化识别测试 +### 可处理失败样本接口 + +这些接口只返回因置信度不足或名称为空而落盘的识别失败记录,可用于生成识别词建议、复查和出队。 + - `GET /api/v1/plugin/AIRecognizerEnhancer/failed_samples` - - 查看最近保存的失败样本 + - 查看最近保存的可处理失败样本 - `GET /api/v1/plugin/AIRecognizerEnhancer/sample_worklist` - 返回适合继续处理的失败样本摘要列表 - `GET /api/v1/plugin/AIRecognizerEnhancer/sample_insights` @@ -68,12 +72,23 @@ MoviePilot 原版智能体已经提供“整理失败后自动接管再试一次 - `POST /api/v1/plugin/AIRecognizerEnhancer/apply_suggested_identifier` - 把建议规则写入系统 `CustomIdentifiers` +### LLM 诊断错误接口 + +这些接口返回因 LLM 调用异常(如超时、网络错误、模型不可用)而产生的诊断记录。它们不参与识别词生成流程,仅供排查 LLM 问题使用。 + +- `GET /api/v1/plugin/AIRecognizerEnhancer/llm_errors` + - 查看 LLM 调用失败的诊断记录 +- `POST /api/v1/plugin/AIRecognizerEnhancer/clear_llm_errors` + - 清空 LLM 错误诊断记录 + 其余批量接口和清理接口可以按需要继续使用,详细路径以插件 `get_api()` 暴露结果为准。 ## 配置建议 - 先确认 MoviePilot 本身已经配置好可用的 LLM -- 建议保持“保存失败样本”开启 +- 建议保持”保存失败样本”开启 +- 默认情况下”保存仅标题样本”是关闭的,这可以减少没有真实文件路径的低价值噪音;如果你的使用场景以纯标题匹配为主,可以在设置中手动开启 +- 如果失败样本池已经积累了大量历史噪音,可在设置页勾选“一次性清空”后保存 - 如果你经常处理历史资源或网盘资源,建议定期查看: - `failed_samples` - `sample_worklist` @@ -81,9 +96,9 @@ MoviePilot 原版智能体已经提供“整理失败后自动接管再试一次 ## 已验证情况 -当前版本:`0.1.12` +当前版本:`0.1.13` -当前 Release:https://github.com/liuyuexi1987/MoviePilot-Plugins/releases/tag/v0.2.68 +当前 Release:https://github.com/liuyuexi1987/MoviePilot-Plugins/releases/tag/v0.2.73 这版已经验证过: diff --git a/plugins/airecognizerenhancer/__init__.py b/plugins/airecognizerenhancer/__init__.py index eee252e..33458c1 100644 --- a/plugins/airecognizerenhancer/__init__.py +++ b/plugins/airecognizerenhancer/__init__.py @@ -53,7 +53,7 @@ class AIRecognizerEnhancer(_PluginBase): plugin_name = "AI识别增强" plugin_desc = "直接复用 MoviePilot 当前 LLM 配置,在原生识别失败后做本地结构化识别兜底,并交回原生链路继续二次识别。" plugin_icon = "https://raw.githubusercontent.com/liuyuexi1987/MoviePilot-Plugins/main/icons/airecognizerenhancer.png" - plugin_version = "0.1.12" + plugin_version = "0.1.13" plugin_author = "liuyuexi1987" plugin_level = 1 author_url = "https://github.com/liuyuexi1987" @@ -67,8 +67,10 @@ class AIRecognizerEnhancer(_PluginBase): _request_timeout = 25 _max_retries = 2 _save_failed_samples = True + _save_title_only_samples = False _max_failed_samples = 200 _auto_remove_applied_sample = True + _clear_failed_samples_once = False _systemconfig: Optional[SystemConfigOper] = None def init_plugin(self, config: Optional[Dict[str, Any]] = None): @@ -79,10 +81,17 @@ class AIRecognizerEnhancer(_PluginBase): self._request_timeout = self._safe_int(config.get("request_timeout"), 25) self._max_retries = max(1, min(5, self._safe_int(config.get("max_retries"), 2))) self._save_failed_samples = bool(config.get("save_failed_samples", True)) + self._save_title_only_samples = bool(config.get("save_title_only_samples", False)) self._max_failed_samples = max(20, min(1000, self._safe_int(config.get("max_failed_samples"), 200))) self._auto_remove_applied_sample = bool(config.get("auto_remove_applied_sample", True)) + self._clear_failed_samples_once = bool(config.get("clear_failed_samples_once", False)) self._systemconfig = SystemConfigOper() self._register_events() + if self._clear_failed_samples_once: + cleared = self._clear_failed_samples() + self._clear_failed_samples_once = False + self.update_config(self._build_config({"clear_failed_samples_once": False})) + logger.info(f"[AI识别增强] 已按配置清空失败样本 {cleared} 条") def get_state(self) -> bool: return self._enabled @@ -117,11 +126,28 @@ class AIRecognizerEnhancer(_PluginBase): if header.lower().startswith("bearer "): return header.split(" ", 1)[1].strip() if body: - for key in ("apikey", "api_key"): + for key in ("apikey", "api_key", "token"): token = str(body.get(key) or "").strip() if token: return token - return str(request.query_params.get("apikey") or "").strip() + return str(request.query_params.get("apikey") or request.query_params.get("token") or "").strip() + + def _build_config(self, overrides: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + config = { + "enabled": self._enabled, + "debug": self._debug, + "confidence_threshold": self._confidence_threshold, + "request_timeout": self._request_timeout, + "max_retries": self._max_retries, + "save_failed_samples": self._save_failed_samples, + "save_title_only_samples": self._save_title_only_samples, + "max_failed_samples": self._max_failed_samples, + "auto_remove_applied_sample": self._auto_remove_applied_sample, + "clear_failed_samples_once": self._clear_failed_samples_once, + } + if overrides: + config.update(overrides) + return config def _check_api_access(self, request: Request, body: Optional[Dict[str, Any]] = None) -> Tuple[bool, str]: expected = str(getattr(settings, "API_TOKEN", "") or "").strip() @@ -174,6 +200,30 @@ class AIRecognizerEnhancer(_PluginBase): ) return str(title or "").strip(), str(path or "").strip() + @staticmethod + def _extract_provenance(event_data: Any) -> Dict[str, str]: + """Extract lightweight provenance metadata from event data for sample recording.""" + source_plugin = "" + if isinstance(event_data, dict): + source_plugin = str(event_data.get("source_plugin") or "").strip() + else: + source_plugin = str(getattr(event_data, "source_plugin", "") or "").strip() + + title = "" + path = "" + if isinstance(event_data, dict): + title = str(event_data.get("title") or event_data.get("name") or event_data.get("org_string") or "").strip() + path = str(event_data.get("path") or event_data.get("file_path") or event_data.get("org_string") or "").strip() + else: + title = str(getattr(event_data, "title", "") or getattr(event_data, "name", "") or getattr(event_data, "org_string", "") or "").strip() + path = str(getattr(event_data, "path", "") or getattr(event_data, "file_path", "") or getattr(event_data, "org_string", "") or "").strip() + + is_path_backed = bool(path) and path != title and ("/" in path or "\\" in path) + return { + "sample_source_kind": "path_backed" if is_path_backed else "title_only", + "sample_source_plugin": source_plugin, + } + def _build_meta_hint(self, raw_text: str) -> Dict[str, Any]: try: meta = MetaInfo(raw_text) @@ -221,6 +271,12 @@ class AIRecognizerEnhancer(_PluginBase): def _sample_path(self) -> Path: return self.get_data_path() / "failed_samples.jsonl" + def _llm_errors_path(self) -> Path: + return self.get_data_path() / "llm_errors.jsonl" + + def _failed_sample_cap(self) -> int: + return max(20, min(1000, self._safe_int(self._max_failed_samples, 200))) + @staticmethod def _sample_identity(payload: Dict[str, Any]) -> str: return json.dumps( @@ -236,7 +292,8 @@ class AIRecognizerEnhancer(_PluginBase): def _write_failed_samples(self, rows: List[Dict[str, Any]]) -> None: sample_path = self._sample_path() sample_path.parent.mkdir(parents=True, exist_ok=True) - trimmed = rows[-self._max_failed_samples:] + filtered = [row for row in rows if not str(row.get("reason") or "").startswith("llm_error:")] + trimmed = filtered[-self._failed_sample_cap():] with sample_path.open("w", encoding="utf-8") as f: for row in trimmed: f.write(json.dumps(row, ensure_ascii=False) + "\n") @@ -254,6 +311,69 @@ class AIRecognizerEnhancer(_PluginBase): except Exception as exc: logger.warning(f"[AI识别增强] 写入失败样本失败: {exc}") + def _record_llm_error(self, title: str, path: str, meta_hint: Dict[str, Any], error: Any, provenance: Optional[Dict[str, str]] = None) -> None: + try: + error_path = self._llm_errors_path() + error_path.parent.mkdir(parents=True, exist_ok=True) + provenance = provenance or {} + entry = { + "title": title, + "path": path, + "meta_hint": meta_hint, + "reason": f"llm_error:{error}", + "timestamp": __import__("datetime").datetime.now().isoformat(), + "sample_source_kind": provenance.get("sample_source_kind", "unknown"), + "sample_source_plugin": provenance.get("sample_source_plugin", ""), + } + existing = self._read_llm_errors(limit=1000) + existing.reverse() + new_identity = {"title": title, "path": path, "reason": entry["reason"]} + existing = [ + row for row in existing + if { + "title": row.get("title"), + "path": row.get("path"), + "reason": row.get("reason"), + } != new_identity + ] + existing.append(entry) + trimmed = existing[-self._failed_sample_cap():] + with error_path.open("w", encoding="utf-8") as f: + for row in trimmed: + f.write(json.dumps(row, ensure_ascii=False) + "\n") + except Exception as exc: + logger.warning(f"[AI识别增强] 写入 LLM 错误诊断记录失败: {exc}") + + def _read_llm_errors(self, limit: int = 20) -> List[Dict[str, Any]]: + error_path = self._llm_errors_path() + if not error_path.exists(): + return [] + rows: List[Dict[str, Any]] = [] + try: + with error_path.open("r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line: + continue + try: + rows.append(json.loads(line)) + except Exception: + continue + except Exception as exc: + logger.warning(f"[AI识别增强] 读取 LLM 错误诊断记录失败: {exc}") + return [] + if limit > 0: + rows = rows[-limit:] + rows.reverse() + return rows + + def _clear_llm_errors(self) -> int: + rows = self._read_llm_errors(limit=10000) + error_path = self._llm_errors_path() + if error_path.exists(): + error_path.unlink() + return len(rows) + def _read_failed_samples(self, limit: int = 20) -> List[Dict[str, Any]]: sample_path = self._sample_path() if not sample_path.exists(): @@ -353,7 +473,7 @@ class AIRecognizerEnhancer(_PluginBase): sample_index: Optional[Any] = None, limit: int = 100, ) -> Tuple[Optional[int], Optional[Dict[str, Any]], str]: - samples = self._read_failed_samples(limit=max(1, min(limit, 200))) + samples = self._read_failed_samples(limit=max(1, min(limit, self._failed_sample_cap()))) if not samples: return None, None, "暂无失败样本" index = self._safe_int(sample_index, 0) @@ -369,9 +489,13 @@ class AIRecognizerEnhancer(_PluginBase): self, sample_indexes: Optional[List[Any]] = None, limit: int = 10, - pool_limit: int = 200, + pool_limit: int = 0, ) -> Tuple[List[int], List[Dict[str, Any]], str]: - current_samples = self._inject_sample_indices(self._read_failed_samples(limit=max(1, min(pool_limit, 1000)))) + if pool_limit <= 0: + pool_limit = self._failed_sample_cap() + current_samples = self._inject_sample_indices( + self._read_failed_samples(limit=max(1, min(pool_limit, self._failed_sample_cap()))) + ) if not current_samples: return [], [], "暂无失败样本" if isinstance(sample_indexes, list) and sample_indexes: @@ -414,6 +538,8 @@ class AIRecognizerEnhancer(_PluginBase): "title": sample.get("title"), "path": sample.get("path"), "reason": sample.get("reason"), + "sample_source_kind": sample.get("sample_source_kind", ""), + "sample_source_plugin": sample.get("sample_source_plugin", ""), "guess_name": guess.get("name"), "guess_confidence": self._safe_float(guess.get("confidence"), 0.0), "verified_title": verified.get("title"), @@ -551,7 +677,10 @@ class AIRecognizerEnhancer(_PluginBase): label = self._sample_display_name(summary) confidence = round(self._safe_float(summary.get("guess_confidence"), 0.0), 2) can_suggest = "可建议" if summary.get("can_auto_suggest") else "需人工" - lines.append(f"{summary.get('sample_index')}. {label} | 置信度 {confidence} | {can_suggest}") + source_tag = "有路径" if summary.get("sample_source_kind") == "path_backed" else "仅标题" + source_plugin = summary.get("sample_source_plugin") or "" + source_info = f" | {source_tag}" + (f" ({source_plugin})" if source_plugin else "") + lines.append(f"{summary.get('sample_index')}. {label} | 置信度 {confidence} | {can_suggest}{source_info}") lines.append("下一步:可直接调用批量建议或批量复查接口。") return "\n".join(lines) @@ -937,7 +1066,7 @@ AI 识别增强结果: selected_indexes, _, message = self._select_failed_sample_indexes( sample_indexes=body.get("sample_indexes"), limit=limit, - pool_limit=200, + pool_limit=self._failed_sample_cap(), ) if not selected_indexes: return {"success": False, "message": message} @@ -1006,7 +1135,7 @@ AI 识别增强结果: selected_indexes, _, message = self._select_failed_sample_indexes( sample_indexes=body.get("sample_indexes"), limit=limit, - pool_limit=200, + pool_limit=self._failed_sample_cap(), ) if not selected_indexes: return {"success": False, "message": message} @@ -1102,7 +1231,7 @@ AI 识别增强结果: selected_indexes, _, message = self._select_failed_sample_indexes( sample_indexes=body.get("sample_indexes"), limit=limit, - pool_limit=200, + pool_limit=self._failed_sample_cap(), ) if not selected_indexes: return {"success": False, "message": message} @@ -1356,40 +1485,49 @@ AI 识别增强结果: logger.warning(f"[AI识别增强] 二次校验失败: {exc}") return None - def _recognize(self, title: str, path: str = "", record_failed_sample: bool = True) -> Dict[str, Any]: + def _recognize( + self, title: str, path: str = "", record_failed_sample: bool = True, + provenance: Optional[Dict[str, str]] = None, + ) -> Dict[str, Any]: title = str(title or "").strip() path = str(path or "").strip() if not title and path: title = Path(path).name if not title: return {"success": False, "message": "标题为空"} + provenance = provenance or {} + sample_source_kind = provenance.get("sample_source_kind") + is_title_only = sample_source_kind == "title_only" if sample_source_kind else not path try: guess = self._invoke_llm(title, path) except Exception as exc: if record_failed_sample: - self._record_failed_sample( - { - "title": title, - "path": path, - "meta_hint": self._build_meta_hint(path or title), - "reason": f"llm_error:{exc}", - } - ) + if is_title_only and not self._save_title_only_samples: + if self._debug: + logger.info(f"[AI识别增强] 跳过保存仅标题 LLM 错误: {title} (save_title_only_samples=False)") + else: + self._record_llm_error(title, path, self._build_meta_hint(path or title), exc, provenance=provenance) return {"success": False, "message": f"LLM 调用失败: {exc}"} verified = self._verify_guess(title, path, guess) passed = bool(guess.name and guess.confidence >= self._confidence_threshold) if not passed and record_failed_sample: - self._record_failed_sample( - { - "title": title, - "path": path, - "meta_hint": self._build_meta_hint(path or title), - "guess": guess.model_dump(), - "verified_media_info": self._compact_verified_summary(verified), - "reason": "low_confidence_or_empty_name", - } - ) + if is_title_only and not self._save_title_only_samples: + if self._debug: + logger.info(f"[AI识别增强] 跳过保存仅标题样本: {title} (save_title_only_samples=False)") + else: + self._record_failed_sample( + { + "title": title, + "path": path, + "meta_hint": self._build_meta_hint(path or title), + "guess": guess.model_dump(), + "verified_media_info": self._compact_verified_summary(verified), + "reason": "low_confidence_or_empty_name", + "sample_source_kind": provenance.get("sample_source_kind", "unknown"), + "sample_source_plugin": provenance.get("sample_source_plugin", ""), + } + ) return { "success": passed, "message": "success" if passed else "识别结果置信度不足,已放弃注入", @@ -1404,7 +1542,8 @@ AI 识别增强结果: title, path = self._extract_title_path(event_data) if not title and not path: return - result = self._recognize(title=title, path=path) + provenance = self._extract_provenance(event_data) + result = self._recognize(title=title, path=path, provenance=provenance) if not result.get("success"): if self._debug: logger.info(f"[AI识别增强] 跳过注入: {title or path} - {result.get('message')}") @@ -1496,7 +1635,7 @@ AI 识别增强结果: if not ok: return {"success": False, "message": message} limit = self._safe_int(request.query_params.get("limit"), 50) - limit = max(1, min(limit, 200)) + limit = max(1, min(limit, self._failed_sample_cap())) top = self._safe_int(request.query_params.get("top"), 10) top = max(1, min(top, 20)) samples = self._inject_sample_indices(self._read_failed_samples(limit=limit)) @@ -1512,7 +1651,7 @@ AI 识别增强结果: return {"success": False, "message": message} limit = self._safe_int(request.query_params.get("limit"), 5) limit = max(1, min(limit, 20)) - samples = self._inject_sample_indices(self._read_failed_samples(limit=100)) + samples = self._inject_sample_indices(self._read_failed_samples(limit=self._failed_sample_cap())) return { "success": True, "data": { @@ -1558,6 +1697,34 @@ AI 识别增强结果: }, } + async def api_llm_errors(self, request: Request): + ok, message = self._check_api_access(request) + if not ok: + return {"success": False, "message": message} + limit = self._safe_int(request.query_params.get("limit"), 20) + limit = max(1, min(limit, 100)) + errors = self._read_llm_errors(limit=limit) + return { + "success": True, + "data": { + "count": len(errors), + "errors": errors, + }, + } + + async def api_clear_llm_errors(self, request: Request): + ok, message = self._check_api_access(request) + if not ok: + return {"success": False, "message": message} + cleared = self._clear_llm_errors() + return { + "success": True, + "message": "success", + "data": { + "cleared_count": cleared, + }, + } + async def api_remove_failed_sample(self, request: Request): body = await request.json() ok, message = self._check_api_access(request, body) @@ -1697,6 +1864,18 @@ AI 识别增强结果: "methods": ["POST"], "summary": "清空失败样本文件", }, + { + "path": "/llm_errors", + "endpoint": self.api_llm_errors, + "methods": ["GET"], + "summary": "查看 LLM 调用失败的诊断记录", + }, + { + "path": "/clear_llm_errors", + "endpoint": self.api_clear_llm_errors, + "methods": ["POST"], + "summary": "清空 LLM 错误诊断记录", + }, { "path": "/remove_failed_sample", "endpoint": self.api_remove_failed_sample, @@ -1731,7 +1910,8 @@ AI 识别增强结果: def get_page(self) -> List[dict]: llm_ready = bool(getattr(settings, "LLM_API_KEY", None)) - failed_samples_count = len(self._read_failed_samples(limit=200)) + failed_samples_count = len(self._read_failed_samples(limit=self._failed_sample_cap())) + llm_errors_count = len(self._read_llm_errors(limit=self._max_failed_samples)) custom_identifiers_count = len(self._get_custom_identifiers()) llm_provider = getattr(settings, "LLM_PROVIDER", "—") llm_model = getattr(settings, "LLM_MODEL", "—") @@ -1784,22 +1964,27 @@ AI 识别增强结果: "content": [ { "component": "VCol", - "props": {"cols": 12, "md": 3}, + "props": {"cols": 12, "sm": 6, "md": 2}, "content": [stat_card("当前状态", "已启用" if self._enabled else "未启用")], }, { "component": "VCol", - "props": {"cols": 12, "md": 3}, + "props": {"cols": 12, "sm": 6, "md": 2}, "content": [stat_card("LLM 可用", "是" if llm_ready else "否", f"{llm_provider} / {llm_model}")], }, { "component": "VCol", - "props": {"cols": 12, "md": 3}, - "content": [stat_card("失败样本", f"{failed_samples_count} 条", f"上限 {self._max_failed_samples} 条")], + "props": {"cols": 12, "sm": 6, "md": 3}, + "content": [stat_card("可处理失败样本", f"{failed_samples_count} 条", f"上限 {self._max_failed_samples} 条")], }, { "component": "VCol", - "props": {"cols": 12, "md": 3}, + "props": {"cols": 12, "sm": 6, "md": 2}, + "content": [stat_card("LLM 错误", f"{llm_errors_count} 条", "诊断记录")], + }, + { + "component": "VCol", + "props": {"cols": 12, "sm": 6, "md": 3}, "content": [stat_card("自定义识别词", f"{custom_identifiers_count} 条", "系统 CustomIdentifiers")], }, ], @@ -1810,34 +1995,7 @@ AI 识别增强结果: "content": [ { "component": "VCol", - "props": {"cols": 12, "md": 6}, - "content": [ - { - "component": "VCard", - "props": {"variant": "outlined", "class": "pa-4 h-100"}, - "content": [ - { - "component": "div", - "props": {"class": "text-subtitle-1 font-weight-bold mb-2"}, - "text": "识别兜底", - }, - { - "component": "div", - "props": {"class": "text-body-2 text-medium-emphasis"}, - "text": "在 Chain NameRecognize 阶段回写 name / year / season / episode,供 MoviePilot 继续原生二次识别。", - }, - { - "component": "div", - "props": {"class": "text-caption text-medium-emphasis mt-3"}, - "text": f"置信度阈值:{self._confidence_threshold};请求超时:{self._request_timeout} 秒", - }, - ], - } - ], - }, - { - "component": "VCol", - "props": {"cols": 12, "md": 6}, + "props": {"cols": 12, "md": 12}, "content": [ { "component": "VCard", @@ -1873,6 +2031,7 @@ AI 识别增强结果: return "vuetify", None def get_form(self) -> Tuple[List[dict], Dict[str, Any]]: + failed_samples_count = len(self._read_failed_samples(limit=self._failed_sample_cap())) form = [ { "component": "VForm", @@ -1896,6 +2055,25 @@ AI 识别增强结果: } ], }, + { + "component": "VRow", + "content": [ + { + "component": "VCol", + "props": {"cols": 12}, + "content": [ + { + "component": "VAlert", + "props": { + "type": "warning", + "variant": "tonal", + "text": f"当前累计 {failed_samples_count} 条失败样本。如需重置噪音数据,请勾选下方“一次性清空”开关后点击保存。该操作只清空失败样本,不会删除已写入的 CustomIdentifiers。", + }, + } + ], + } + ], + }, { "component": "VRow", "content": [ @@ -1929,6 +2107,19 @@ AI 识别增强结果: } ], }, + { + "component": "VCol", + "props": {"cols": 12, "md": 4}, + "content": [ + { + "component": "VSwitch", + "props": { + "model": "save_title_only_samples", + "label": "保存仅标题样本", + }, + } + ], + }, ], }, { @@ -2010,6 +2201,24 @@ AI 识别增强结果: } ], }, + { + "component": "VRow", + "content": [ + { + "component": "VCol", + "props": {"cols": 12}, + "content": [ + { + "component": "VSwitch", + "props": { + "model": "clear_failed_samples_once", + "label": "保存时清空失败样本(一次性)", + }, + } + ], + } + ], + }, { "component": "VRow", "content": [ @@ -2038,6 +2247,8 @@ AI 识别增强结果: "request_timeout": 25, "max_retries": 2, "save_failed_samples": True, + "save_title_only_samples": False, "max_failed_samples": 200, "auto_remove_applied_sample": True, + "clear_failed_samples_once": False, }