fix: handle tmdb gzip json responses

This commit is contained in:
jxxghp
2026-05-20 16:54:01 +08:00
parent e00aa42f94
commit 47c4e84fdd
2 changed files with 73 additions and 6 deletions

View File

@@ -22,6 +22,7 @@ logger = logging.getLogger(__name__)
class TMDb(object):
_RESPONSE_SNAPSHOT_MARKER = "__mp_tmdb_response_snapshot__"
_JSON_DECODE_FAILED = object()
_MAX_GZIP_DECODE_DEPTH = 3
def __init__(self, session=None, language=None):
self._api_key = settings.TMDB_API_KEY
@@ -37,14 +38,27 @@ class TMDb(object):
if not self._session:
self._session = requests.Session()
self._req = RequestUtils(ua=settings.NORMAL_USER_AGENT, session=self._session, proxies=self.proxies)
request_headers = self._build_request_headers()
self._req = RequestUtils(headers=request_headers, session=self._session, proxies=self.proxies)
self._async_req = AsyncRequestUtils(ua=settings.NORMAL_USER_AGENT, proxies=self.proxies)
self._async_req = AsyncRequestUtils(headers=request_headers, proxies=self.proxies)
self._remaining = 40
self._reset = None
self._timeout = 15
@staticmethod
def _build_request_headers():
"""
构造TMDB JSON请求头避免小体积JSON被代理重复压缩。
"""
return {
"User-Agent": settings.NORMAL_USER_AGENT,
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"Accept": "application/json",
"Accept-Encoding": "identity",
}
@property
def page(self):
return self._page
@@ -197,10 +211,27 @@ class TMDb(object):
if "gzip" not in encodings and not content_bytes.startswith(b"\x1f\x8b"):
return cls._JSON_DECODE_FAILED
try:
return jsonlib.loads(gzip.decompress(content_bytes))
except (OSError, EOFError, ValueError, UnicodeDecodeError):
return cls._JSON_DECODE_FAILED
for json_payload in cls._iter_gzip_decoded_payloads(content_bytes):
try:
return jsonlib.loads(json_payload)
except (ValueError, UnicodeDecodeError):
continue
return cls._JSON_DECODE_FAILED
@classmethod
def _iter_gzip_decoded_payloads(cls, content_bytes: bytes):
"""
逐层解开gzip响应体兼容客户端或代理只解压了部分层级的情况。
"""
current_payload = content_bytes
for _ in range(cls._MAX_GZIP_DECODE_DEPTH):
if not current_payload.startswith(b"\x1f\x8b"):
return
try:
current_payload = gzip.decompress(current_payload)
except (OSError, EOFError):
return
yield current_payload
@staticmethod
def _get_header_value(headers, name):

View File

@@ -174,7 +174,29 @@ class _GzipJsonResponse(_UnicodeDecodeErrorResponse):
super().__init__(gzip.compress(json.dumps(payload).encode("utf-8")))
class _DoubleGzipJsonResponse(_UnicodeDecodeErrorResponse):
"""
模拟代理或上游重复gzip压缩后的JSON响应。
"""
def __init__(self, payload):
"""
将JSON载荷压缩两次复现客户端只自动解开外层gzip的情况。
"""
inner_payload = gzip.compress(json.dumps(payload).encode("utf-8"))
super().__init__(gzip.compress(inner_payload))
class TmdbResponseCacheTest(TestCase):
def test_build_request_headers_disables_response_compression(self):
"""
TMDB请求应避免主动接受压缩JSON减少代理保留gzip响应头的兼容问题。
"""
headers = TMDb._build_request_headers()
self.assertEqual(headers["Accept"], "application/json")
self.assertEqual(headers["Accept-Encoding"], "identity")
def test_request_returns_pickleable_snapshot(self):
tmdb = TMDb()
response = _FakeResponse(
@@ -252,6 +274,20 @@ class TmdbResponseCacheTest(TestCase):
self.assertTrue(result[TMDb._RESPONSE_SNAPSHOT_MARKER])
self.assertEqual(result["json"]["results"], [{"id": 100}])
def test_request_decodes_nested_gzip_json_response(self):
"""
响应体仍是gzip字节时应逐层解压直到得到可解析的JSON。
"""
tmdb = TMDb()
tmdb._req.get_res = lambda *args, **kwargs: _DoubleGzipJsonResponse(
{"page": 1, "results": [{"id": 101}]}
)
result = TMDb.request.__wrapped__(tmdb, "GET", "https://example.com", None, None)
self.assertTrue(result[TMDb._RESPONSE_SNAPSHOT_MARKER])
self.assertEqual(result["json"]["results"], [{"id": 101}])
def test_get_response_json_rejects_invalid_live_response(self):
"""
未缓存的实时响应解析失败时也应输出统一诊断信息。