Merge pull request #993 from honue/main

2026-03-27 10:05:57 +00:00 · 2026-02-25 15:13:11 +08:00
parent 1ae826cf14 944919fc34
commit 437b2b05d4
2 changed files with 64 additions and 20 deletions
--- a/package.json
+++ b/package.json
@@ -679,12 +679,13 @@
    "name": "共享识别词",
    "description": "从Github、Etherpad等远程文件中获取共享识别词并应用。",
    "labels": "识别",
-    "version": "2.3",
+    "version": "2.4",
    "icon": "words.png",
    "author": "honue",
    "level": 1,
    "v2": true,
    "history": {
+      "v2.4": "支持 JSON 格式远程识别词集合订阅",
      "v2.3": "更换默认共享识别词地址"
    }
  },
--- a/plugins/remoteidentifiers/init.py
+++ b/plugins/remoteidentifiers/init.py
@@ -1,3 +1,4 @@
+import json
 from typing import List, Tuple, Dict, Any

 import datetime
@@ -23,7 +24,7 @@ class RemoteIdentifiers(_PluginBase):
    # 插件图标
    plugin_icon = "words.png"
    # 插件版本
-    plugin_version = "2.3"
+    plugin_version = "2.4"
    # 插件作者
    plugin_author = "honue"
    # 作者主页
@@ -74,25 +75,17 @@ class RemoteIdentifiers(_PluginBase):
    def get_file_content(self, file_urls: list) -> List[str]:
        ret: List[str] = ['#========以下识别词由 RemoteIdentifiers 插件添加========#']
        for file_url in file_urls:
-            # https://movie-pilot.org/etherpad/p/MoviePilot_TV_Words
-            if file_url.count("etherpad") != 0 and file_url.count("export") == 0:
-                real_url = file_url + "/export/txt"
+            file_url = file_url.strip()
+            if not file_url:
+                continue
+            if file_url.lower().endswith(".json"):
+                mapping = self.__get_remote_mapping(file_url=file_url)
+                for words_name, words_url in mapping.items():
+                    identifiers = self.__get_remote_identifiers(words_url=words_url, words_name=words_name)
+                    ret += identifiers
            else:
-                real_url = file_url
-            response = RequestUtils(proxies=settings.PROXY,
-                                    headers=settings.GITHUB_HEADERS if real_url.count("github") else None,
-                                    timeout=15).get_res(real_url)
-            if not response:
-                raise Exception(f"文件 {real_url} 下载失败！")
-            elif response.status_code != 200:
-                raise Exception(f"下载文件 {real_url} 失败：{response.status_code} - {response.reason}")
-            text = response.content.decode('utf-8')
-            if text.find("doctype html") > 0:
-                raise Exception(f"下载文件 {real_url} 失败：{response.status_code} - {response.reason}")
-            if "try again later" in text:
-                raise Exception(f"下载文件 {real_url} 失败：{text}")
-            identifiers: List[str] = text.split('\n')
-            ret += identifiers
+                identifiers = self.__get_remote_identifiers(words_url=file_url)
+                ret += identifiers
        # flitter 过滤空行
        if self._flitter:
            filtered_ret = []
@@ -103,6 +96,56 @@ class RemoteIdentifiers(_PluginBase):
        logger.info(f"获取到远端识别词{len(ret) - 1}条: {ret[1:]}")
        return ret

+    def __get_real_url(self, words_url: str) -> str:
+        # https://movie-pilot.org/etherpad/p/MoviePilot_TV_Words
+        if words_url.count("etherpad") != 0 and words_url.count("export") == 0:
+            return words_url + "/export/txt"
+        return words_url
+
+    def __get_response_text(self, url: str) -> str:
+        response = RequestUtils(
+            proxies=settings.PROXY,
+            headers=settings.GITHUB_HEADERS if url.count("github") else None,
+            timeout=15
+        ).get_res(url)
+        if not response:
+            raise Exception(f"文件 {url} 下载失败！")
+        if response.status_code != 200:
+            raise Exception(f"下载文件 {url} 失败：{response.status_code} - {response.reason}")
+        text = response.content.decode('utf-8')
+        if text.find("doctype html") > 0:
+            raise Exception(f"下载文件 {url} 失败：{response.status_code} - {response.reason}")
+        if "try again later" in text:
+            raise Exception(f"下载文件 {url} 失败：{text}")
+        return text
+
+    def __get_remote_identifiers(self, words_url: str, words_name: str = None) -> List[str]:
+        real_url = self.__get_real_url(words_url=words_url)
+        text = self.__get_response_text(url=real_url)
+        identifiers = text.split('\n')
+        if words_name:
+            logger.info(f"词表[{words_name}]获取成功，地址：{real_url}，识别词数量：{len(identifiers)}")
+        return identifiers
+
+    def __get_remote_mapping(self, file_url: str) -> Dict[str, str]:
+        real_url = self.__get_real_url(words_url=file_url)
+        text = self.__get_response_text(url=real_url)
+        try:
+            mapping = json.loads(text)
+        except json.JSONDecodeError as e:
+            raise Exception(f"订阅文件 {real_url} 不是合法JSON：{str(e)}")
+        if not isinstance(mapping, dict):
+            raise Exception(f"订阅文件 {real_url} 格式错误：必须为对象，格式为 词表名 -> 词表地址")
+        normalized_mapping: Dict[str, str] = {}
+        for words_name, words_url in mapping.items():
+            if not isinstance(words_name, str):
+                raise Exception(f"订阅文件 {real_url} 格式错误：词表名必须是字符串")
+            if not isinstance(words_url, str) or not words_url.strip():
+                raise Exception(f"订阅文件 {real_url} 格式错误：词表[{words_name}]地址必须是非空字符串")
+            normalized_mapping[words_name] = words_url.strip()
+        logger.info(f"订阅文件[{real_url}]解析成功，共 {len(normalized_mapping)} 个词表")
+        return normalized_mapping
+
    def __task(self):
        words: List[str] = self.systemconfig.get(SystemConfigKey.CustomIdentifiers) or []
        file_urls: list = self._file_urls.split('\n') if self._file_urls else []