From 47c4e84fdd7b1766f1a80da0ae8fd8b5949d37c5 Mon Sep 17 00:00:00 2001 From: jxxghp Date: Wed, 20 May 2026 16:54:01 +0800 Subject: [PATCH] fix: handle tmdb gzip json responses --- app/modules/themoviedb/tmdbv3api/tmdb.py | 43 ++++++++++++++++++++---- tests/test_tmdb_response_cache.py | 36 ++++++++++++++++++++ 2 files changed, 73 insertions(+), 6 deletions(-) diff --git a/app/modules/themoviedb/tmdbv3api/tmdb.py b/app/modules/themoviedb/tmdbv3api/tmdb.py index 3e93de4f..52bea513 100644 --- a/app/modules/themoviedb/tmdbv3api/tmdb.py +++ b/app/modules/themoviedb/tmdbv3api/tmdb.py @@ -22,6 +22,7 @@ logger = logging.getLogger(__name__) class TMDb(object): _RESPONSE_SNAPSHOT_MARKER = "__mp_tmdb_response_snapshot__" _JSON_DECODE_FAILED = object() + _MAX_GZIP_DECODE_DEPTH = 3 def __init__(self, session=None, language=None): self._api_key = settings.TMDB_API_KEY @@ -37,14 +38,27 @@ class TMDb(object): if not self._session: self._session = requests.Session() - self._req = RequestUtils(ua=settings.NORMAL_USER_AGENT, session=self._session, proxies=self.proxies) + request_headers = self._build_request_headers() + self._req = RequestUtils(headers=request_headers, session=self._session, proxies=self.proxies) - self._async_req = AsyncRequestUtils(ua=settings.NORMAL_USER_AGENT, proxies=self.proxies) + self._async_req = AsyncRequestUtils(headers=request_headers, proxies=self.proxies) self._remaining = 40 self._reset = None self._timeout = 15 + @staticmethod + def _build_request_headers(): + """ + 构造TMDB JSON请求头,避免小体积JSON被代理重复压缩。 + """ + return { + "User-Agent": settings.NORMAL_USER_AGENT, + "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", + "Accept": "application/json", + "Accept-Encoding": "identity", + } + @property def page(self): return self._page @@ -197,10 +211,27 @@ class TMDb(object): if "gzip" not in encodings and not content_bytes.startswith(b"\x1f\x8b"): return cls._JSON_DECODE_FAILED - try: - return jsonlib.loads(gzip.decompress(content_bytes)) - except (OSError, EOFError, ValueError, UnicodeDecodeError): - return cls._JSON_DECODE_FAILED + for json_payload in cls._iter_gzip_decoded_payloads(content_bytes): + try: + return jsonlib.loads(json_payload) + except (ValueError, UnicodeDecodeError): + continue + return cls._JSON_DECODE_FAILED + + @classmethod + def _iter_gzip_decoded_payloads(cls, content_bytes: bytes): + """ + 逐层解开gzip响应体,兼容客户端或代理只解压了部分层级的情况。 + """ + current_payload = content_bytes + for _ in range(cls._MAX_GZIP_DECODE_DEPTH): + if not current_payload.startswith(b"\x1f\x8b"): + return + try: + current_payload = gzip.decompress(current_payload) + except (OSError, EOFError): + return + yield current_payload @staticmethod def _get_header_value(headers, name): diff --git a/tests/test_tmdb_response_cache.py b/tests/test_tmdb_response_cache.py index 3421be74..ce573dd8 100644 --- a/tests/test_tmdb_response_cache.py +++ b/tests/test_tmdb_response_cache.py @@ -174,7 +174,29 @@ class _GzipJsonResponse(_UnicodeDecodeErrorResponse): super().__init__(gzip.compress(json.dumps(payload).encode("utf-8"))) +class _DoubleGzipJsonResponse(_UnicodeDecodeErrorResponse): + """ + 模拟代理或上游重复gzip压缩后的JSON响应。 + """ + + def __init__(self, payload): + """ + 将JSON载荷压缩两次,复现客户端只自动解开外层gzip的情况。 + """ + inner_payload = gzip.compress(json.dumps(payload).encode("utf-8")) + super().__init__(gzip.compress(inner_payload)) + + class TmdbResponseCacheTest(TestCase): + def test_build_request_headers_disables_response_compression(self): + """ + TMDB请求应避免主动接受压缩JSON,减少代理保留gzip响应头的兼容问题。 + """ + headers = TMDb._build_request_headers() + + self.assertEqual(headers["Accept"], "application/json") + self.assertEqual(headers["Accept-Encoding"], "identity") + def test_request_returns_pickleable_snapshot(self): tmdb = TMDb() response = _FakeResponse( @@ -252,6 +274,20 @@ class TmdbResponseCacheTest(TestCase): self.assertTrue(result[TMDb._RESPONSE_SNAPSHOT_MARKER]) self.assertEqual(result["json"]["results"], [{"id": 100}]) + def test_request_decodes_nested_gzip_json_response(self): + """ + 响应体仍是gzip字节时,应逐层解压直到得到可解析的JSON。 + """ + tmdb = TMDb() + tmdb._req.get_res = lambda *args, **kwargs: _DoubleGzipJsonResponse( + {"page": 1, "results": [{"id": 101}]} + ) + + result = TMDb.request.__wrapped__(tmdb, "GET", "https://example.com", None, None) + + self.assertTrue(result[TMDb._RESPONSE_SNAPSHOT_MARKER]) + self.assertEqual(result["json"]["results"], [{"id": 101}]) + def test_get_response_json_rejects_invalid_live_response(self): """ 未缓存的实时响应解析失败时也应输出统一诊断信息。