diff --git a/app/modules/themoviedb/tmdbv3api/tmdb.py b/app/modules/themoviedb/tmdbv3api/tmdb.py index 93948ec4..10534118 100644 --- a/app/modules/themoviedb/tmdbv3api/tmdb.py +++ b/app/modules/themoviedb/tmdbv3api/tmdb.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- import asyncio +import gzip +import json as jsonlib import logging import time from copy import deepcopy @@ -19,6 +21,7 @@ logger = logging.getLogger(__name__) class TMDb(object): _RESPONSE_SNAPSHOT_MARKER = "__mp_tmdb_response_snapshot__" + _JSON_DECODE_FAILED = object() def __init__(self, session=None, language=None): self._api_key = settings.TMDB_API_KEY @@ -164,9 +167,62 @@ class TMDb(object): return response.json() except (ValueError, UnicodeDecodeError) as err: # httpx.Response.json() 在响应体是压缩字节或错误编码时会直接抛 UnicodeDecodeError, - # 这里统一收敛成 TMDbException,避免上层把脏响应当作未捕获异常。 + # 先尝试兼容未被客户端解压的 gzip JSON,仍失败时再收敛成 TMDbException。 + json_data = cls._decode_compressed_response_json(response) + if json_data is not cls._JSON_DECODE_FAILED: + return json_data raise TMDbException(cls._build_invalid_json_message(response)) from err + @classmethod + def _decode_compressed_response_json(cls, response): + """ + 尝试解析未被HTTP客户端自动解压的压缩JSON响应。 + """ + response_content = getattr(response, "content", b"") or b"" + if isinstance(response_content, str): + response_content = response_content.encode("utf-8") + if not isinstance(response_content, (bytes, bytearray)): + return cls._JSON_DECODE_FAILED + + content_bytes = bytes(response_content) + content_encoding = cls._get_header_value( + getattr(response, "headers", {}) or {}, + "Content-Encoding", + ) or "" + encodings = { + encoding.strip().lower() + for encoding in str(content_encoding).split(",") + if encoding.strip() + } + if "gzip" not in encodings and not content_bytes.startswith(b"\x1f\x8b"): + return cls._JSON_DECODE_FAILED + + try: + return jsonlib.loads(gzip.decompress(content_bytes)) + except (OSError, EOFError, ValueError, UnicodeDecodeError): + return cls._JSON_DECODE_FAILED + + @staticmethod + def _get_header_value(headers, name): + """ + 从不同响应头对象中按大小写兼容读取指定响应头。 + """ + try: + value = headers.get(name) + except AttributeError: + return None + if value is not None: + return value + + lower_name = name.lower() + try: + for header_name, header_value in headers.items(): + if str(header_name).lower() == lower_name: + return header_value + except AttributeError: + return None + return None + @staticmethod def _build_invalid_json_message(response): """ @@ -174,10 +230,7 @@ class TMDb(object): """ status_code = getattr(response, "status_code", None) headers = getattr(response, "headers", {}) or {} - try: - content_type = headers.get("content-type") or headers.get("Content-Type") - except AttributeError: - content_type = None + content_type = TMDb._get_header_value(headers, "Content-Type") try: response_text = getattr(response, "text", "") or "" @@ -194,7 +247,7 @@ class TMDb(object): message_parts.append(f"HTTP状态码:{status_code}") if content_type: message_parts.append(f"Content-Type:{content_type}") - content_encoding = headers.get("content-encoding") or headers.get("Content-Encoding") + content_encoding = TMDb._get_header_value(headers, "Content-Encoding") if content_encoding: message_parts.append(f"Content-Encoding:{content_encoding}") if response_text: diff --git a/tests/test_tmdb_response_cache.py b/tests/test_tmdb_response_cache.py index b71c66a8..0a36343e 100644 --- a/tests/test_tmdb_response_cache.py +++ b/tests/test_tmdb_response_cache.py @@ -1,5 +1,7 @@ import asyncio +import gzip import importlib.util +import json import pickle import sys from contextlib import asynccontextmanager, contextmanager @@ -144,13 +146,14 @@ class _UnicodeDecodeErrorResponse: 模拟 httpx.Response.json() 直接抛 UnicodeDecodeError 的异常响应。 """ - def __init__(self): + def __init__(self, content: bytes = b"\x8b"): """ 初始化一个带有压缩响应特征的伪响应对象。 """ self.headers = {"Content-Type": "application/json", "Content-Encoding": "gzip"} self.status_code = 200 self.text = "" + self.content = content def json(self): """ @@ -159,6 +162,18 @@ class _UnicodeDecodeErrorResponse: raise UnicodeDecodeError("utf-8", b"\x8b", 1, 2, "invalid start byte") +class _GzipJsonResponse(_UnicodeDecodeErrorResponse): + """ + 模拟响应对象带着尚未解压的 gzip JSON 字节。 + """ + + def __init__(self, payload): + """ + 将JSON载荷压缩成 gzip 字节,复现代理返回原始压缩体的情况。 + """ + super().__init__(gzip.compress(json.dumps(payload).encode("utf-8"))) + + class TmdbResponseCacheTest(TestCase): def test_request_returns_pickleable_snapshot(self): tmdb = TMDb() @@ -217,6 +232,20 @@ class TmdbResponseCacheTest(TestCase): with self.assertRaisesRegex(TMDbException, "不是有效JSON.*Content-Encoding:gzip"): TMDb.request.__wrapped__(tmdb, "GET", "https://example.com", None, None) + def test_request_decodes_raw_gzip_json_response(self): + """ + 客户端未自动解压 gzip JSON 时,应手动解压后正常进入响应快照。 + """ + tmdb = TMDb() + tmdb._req.get_res = lambda *args, **kwargs: _GzipJsonResponse( + {"page": 1, "results": [{"id": 100}]} + ) + + result = TMDb.request.__wrapped__(tmdb, "GET", "https://example.com", None, None) + + self.assertTrue(result[TMDb._RESPONSE_SNAPSHOT_MARKER]) + self.assertEqual(result["json"]["results"], [{"id": 100}]) + def test_get_response_json_rejects_invalid_live_response(self): """ 未缓存的实时响应解析失败时也应输出统一诊断信息。