fix: decode raw gzip tmdb responses

This commit is contained in:
jxxghp
2026-05-20 10:44:01 +08:00
parent b07c47551c
commit c3e4e1a764
2 changed files with 89 additions and 7 deletions

View File

@@ -1,6 +1,8 @@
# -*- coding: utf-8 -*-
import asyncio
import gzip
import json as jsonlib
import logging
import time
from copy import deepcopy
@@ -19,6 +21,7 @@ logger = logging.getLogger(__name__)
class TMDb(object):
_RESPONSE_SNAPSHOT_MARKER = "__mp_tmdb_response_snapshot__"
_JSON_DECODE_FAILED = object()
def __init__(self, session=None, language=None):
self._api_key = settings.TMDB_API_KEY
@@ -164,9 +167,62 @@ class TMDb(object):
return response.json()
except (ValueError, UnicodeDecodeError) as err:
# httpx.Response.json() 在响应体是压缩字节或错误编码时会直接抛 UnicodeDecodeError
# 这里统一收敛成 TMDbException,避免上层把脏响应当作未捕获异常
# 先尝试兼容未被客户端解压的 gzip JSON仍失败时再收敛成 TMDbException。
json_data = cls._decode_compressed_response_json(response)
if json_data is not cls._JSON_DECODE_FAILED:
return json_data
raise TMDbException(cls._build_invalid_json_message(response)) from err
@classmethod
def _decode_compressed_response_json(cls, response):
"""
尝试解析未被HTTP客户端自动解压的压缩JSON响应。
"""
response_content = getattr(response, "content", b"") or b""
if isinstance(response_content, str):
response_content = response_content.encode("utf-8")
if not isinstance(response_content, (bytes, bytearray)):
return cls._JSON_DECODE_FAILED
content_bytes = bytes(response_content)
content_encoding = cls._get_header_value(
getattr(response, "headers", {}) or {},
"Content-Encoding",
) or ""
encodings = {
encoding.strip().lower()
for encoding in str(content_encoding).split(",")
if encoding.strip()
}
if "gzip" not in encodings and not content_bytes.startswith(b"\x1f\x8b"):
return cls._JSON_DECODE_FAILED
try:
return jsonlib.loads(gzip.decompress(content_bytes))
except (OSError, EOFError, ValueError, UnicodeDecodeError):
return cls._JSON_DECODE_FAILED
@staticmethod
def _get_header_value(headers, name):
"""
从不同响应头对象中按大小写兼容读取指定响应头。
"""
try:
value = headers.get(name)
except AttributeError:
return None
if value is not None:
return value
lower_name = name.lower()
try:
for header_name, header_value in headers.items():
if str(header_name).lower() == lower_name:
return header_value
except AttributeError:
return None
return None
@staticmethod
def _build_invalid_json_message(response):
"""
@@ -174,10 +230,7 @@ class TMDb(object):
"""
status_code = getattr(response, "status_code", None)
headers = getattr(response, "headers", {}) or {}
try:
content_type = headers.get("content-type") or headers.get("Content-Type")
except AttributeError:
content_type = None
content_type = TMDb._get_header_value(headers, "Content-Type")
try:
response_text = getattr(response, "text", "") or ""
@@ -194,7 +247,7 @@ class TMDb(object):
message_parts.append(f"HTTP状态码{status_code}")
if content_type:
message_parts.append(f"Content-Type{content_type}")
content_encoding = headers.get("content-encoding") or headers.get("Content-Encoding")
content_encoding = TMDb._get_header_value(headers, "Content-Encoding")
if content_encoding:
message_parts.append(f"Content-Encoding{content_encoding}")
if response_text:

View File

@@ -1,5 +1,7 @@
import asyncio
import gzip
import importlib.util
import json
import pickle
import sys
from contextlib import asynccontextmanager, contextmanager
@@ -144,13 +146,14 @@ class _UnicodeDecodeErrorResponse:
模拟 httpx.Response.json() 直接抛 UnicodeDecodeError 的异常响应。
"""
def __init__(self):
def __init__(self, content: bytes = b"\x8b"):
"""
初始化一个带有压缩响应特征的伪响应对象。
"""
self.headers = {"Content-Type": "application/json", "Content-Encoding": "gzip"}
self.status_code = 200
self.text = ""
self.content = content
def json(self):
"""
@@ -159,6 +162,18 @@ class _UnicodeDecodeErrorResponse:
raise UnicodeDecodeError("utf-8", b"\x8b", 1, 2, "invalid start byte")
class _GzipJsonResponse(_UnicodeDecodeErrorResponse):
"""
模拟响应对象带着尚未解压的 gzip JSON 字节。
"""
def __init__(self, payload):
"""
将JSON载荷压缩成 gzip 字节,复现代理返回原始压缩体的情况。
"""
super().__init__(gzip.compress(json.dumps(payload).encode("utf-8")))
class TmdbResponseCacheTest(TestCase):
def test_request_returns_pickleable_snapshot(self):
tmdb = TMDb()
@@ -217,6 +232,20 @@ class TmdbResponseCacheTest(TestCase):
with self.assertRaisesRegex(TMDbException, "不是有效JSON.*Content-Encodinggzip"):
TMDb.request.__wrapped__(tmdb, "GET", "https://example.com", None, None)
def test_request_decodes_raw_gzip_json_response(self):
"""
客户端未自动解压 gzip JSON 时,应手动解压后正常进入响应快照。
"""
tmdb = TMDb()
tmdb._req.get_res = lambda *args, **kwargs: _GzipJsonResponse(
{"page": 1, "results": [{"id": 100}]}
)
result = TMDb.request.__wrapped__(tmdb, "GET", "https://example.com", None, None)
self.assertTrue(result[TMDb._RESPONSE_SNAPSHOT_MARKER])
self.assertEqual(result["json"]["results"], [{"id": 100}])
def test_get_response_json_rejects_invalid_live_response(self):
"""
未缓存的实时响应解析失败时也应输出统一诊断信息。