fix: handle invalid tmdb json responses

This commit is contained in:
jxxghp
2026-05-20 09:05:18 +08:00
parent 5d02550874
commit 07f51c5d94
2 changed files with 185 additions and 5 deletions

View File

@@ -130,11 +130,16 @@ class TMDb(object):
@classmethod
def _snapshot_response(cls, response):
"""
生成可缓存的响应快照并在入缓存前拦截明显异常的TMDB响应结构。
"""
json_data = cls._decode_response_json(response)
cls._validate_json_response(json_data)
# Redis 不能稳定序列化 requests/httpx 响应对象,缓存里只保留当前流程会用到的数据。
return {
cls._RESPONSE_SNAPSHOT_MARKER: True,
"headers": dict(response.headers.items()),
"json": response.json(),
"json": json_data,
}
@classmethod
@@ -148,7 +153,78 @@ class TMDb(object):
if isinstance(response, dict) and response.get(cls._RESPONSE_SNAPSHOT_MARKER):
# 调用方会补充 media_type 等字段,缓存快照必须隔离这些原地修改。
return deepcopy(response.get("json"))
return response.json()
return cls._decode_response_json(response)
@classmethod
def _decode_response_json(cls, response):
"""
解析TMDB响应JSON并把空响应或代理错误页统一转换为TMDB异常。
"""
try:
return response.json()
except ValueError as err:
raise TMDbException(cls._build_invalid_json_message(response)) from err
@staticmethod
def _build_invalid_json_message(response):
"""
生成非JSON响应的诊断信息避免日志只保留JSONDecodeError文本。
"""
status_code = getattr(response, "status_code", None)
headers = getattr(response, "headers", {}) or {}
try:
content_type = headers.get("content-type") or headers.get("Content-Type")
except AttributeError:
content_type = None
try:
response_text = getattr(response, "text", "") or ""
except Exception as err: # pragma: no cover - 防御异常响应对象
response_text = f"<读取响应内容失败:{err!r}>"
if not isinstance(response_text, str):
response_text = repr(response_text)
response_text = response_text.strip()
if len(response_text) > 200:
response_text = f"{response_text[:200]}..."
message_parts = ["TheMovieDb 返回数据不是有效JSON"]
if status_code is not None:
message_parts.append(f"HTTP状态码{status_code}")
if content_type:
message_parts.append(f"Content-Type{content_type}")
if response_text:
message_parts.append(f"响应内容:{response_text!r}")
else:
message_parts.append("响应内容为空")
return "".join(message_parts)
@staticmethod
def _validate_json_response(json_data):
"""
校验TMDB响应JSON顶层结构避免代理错误页等标量值继续按字典解析。
"""
if isinstance(json_data, (dict, list)):
return
payload_preview = repr(json_data)
if len(payload_preview) > 200:
payload_preview = f"{payload_preview[:200]}..."
raise TMDbException(
"TheMovieDb 返回数据格式异常期望JSON对象或数组"
f"实际为{type(json_data).__name__},内容:{payload_preview}"
)
@staticmethod
def _get_json_key(json_data, key):
"""
从TMDB对象响应中读取指定字段避免异常顶层结构触发AttributeError。
"""
if not isinstance(json_data, dict):
raise TMDbException(
"TheMovieDb 返回数据格式异常:"
f"期望JSON对象包含字段 {key!r},实际为{type(json_data).__name__}"
)
return json_data.get(key)
def cache_clear(self):
return self.request.cache_clear()
@@ -190,6 +266,12 @@ class TMDb(object):
return 0
def _process_json_response(self, json_data, is_async=False):
"""
从TMDB对象响应中记录分页信息数组响应没有分页字段直接跳过。
"""
if not isinstance(json_data, dict):
return
if "page" in json_data:
self._page = json_data["page"]
@@ -201,6 +283,12 @@ class TMDb(object):
@staticmethod
def _handle_errors(json_data):
"""
将TMDB标准错误字段转换为统一异常非对象响应由结构校验提前处理。
"""
if not isinstance(json_data, dict):
return
if "errors" in json_data:
raise TMDbException(json_data["errors"])
@@ -228,11 +316,12 @@ class TMDb(object):
return self._request_obj(action, params, False, method, data, json, key)
json_data = self._get_response_json(req)
self._validate_json_response(json_data)
self._process_json_response(json_data, is_async=False)
self._handle_errors(json_data)
if key:
return json_data.get(key)
return self._get_json_key(json_data, key)
return json_data
async def _async_request_obj(self, action, params="", call_cached=True,
@@ -256,11 +345,12 @@ class TMDb(object):
return await self._async_request_obj(action, params, False, method, data, json, key)
json_data = self._get_response_json(req)
self._validate_json_response(json_data)
self._process_json_response(json_data, is_async=True)
self._handle_errors(json_data)
if key:
return json_data.get(key)
return self._get_json_key(json_data, key)
return json_data
def close(self):

View File

@@ -124,12 +124,15 @@ def _load_tmdb_class():
TMDb = _load_tmdb_class()
TMDbException = sys.modules["app.modules.themoviedb.tmdbv3api.exceptions"].TMDbException
class _FakeResponse:
def __init__(self, payload: dict, headers: dict):
def __init__(self, payload, headers: dict, status_code: int = 200, text: str = ""):
self._payload = payload
self.headers = headers
self.status_code = status_code
self.text = text
self._lock = RLock()
def json(self):
@@ -152,6 +155,56 @@ class TmdbResponseCacheTest(TestCase):
self.assertEqual(result["headers"]["X-RateLimit-Remaining"], "39")
pickle.dumps(result)
def test_request_rejects_scalar_json_response(self):
"""
标量JSON响应不应进入TMDB响应缓存避免后续按对象解析崩溃。
"""
tmdb = TMDb()
response = _FakeResponse(payload="upstream error", headers={})
tmdb._req.get_res = lambda *args, **kwargs: response
with self.assertRaisesRegex(TMDbException, "返回数据格式异常"):
TMDb.request.__wrapped__(tmdb, "GET", "https://example.com", None, None)
def test_request_rejects_invalid_json_response(self):
"""
非JSON响应应转换为TMDbException调用方可按连接异常统一处理。
"""
class _InvalidJsonResponse:
headers = {"Content-Type": "text/html"}
status_code = 502
text = "<html>bad gateway</html>"
def json(self):
"""
模拟上游返回无法解析为JSON的响应体。
"""
raise ValueError("invalid json")
tmdb = TMDb()
tmdb._req.get_res = lambda *args, **kwargs: _InvalidJsonResponse()
with self.assertRaisesRegex(TMDbException, "不是有效JSON.*HTTP状态码502.*bad gateway"):
TMDb.request.__wrapped__(tmdb, "GET", "https://example.com", None, None)
def test_get_response_json_rejects_invalid_live_response(self):
"""
未缓存的实时响应解析失败时也应输出统一诊断信息。
"""
class _InvalidJsonResponse:
headers = {}
status_code = 200
text = ""
def json(self):
"""
模拟HTTP 200但响应体为空的情况。
"""
raise ValueError("empty")
with self.assertRaisesRegex(TMDbException, "不是有效JSON.*响应内容为空"):
TMDb._get_response_json(_InvalidJsonResponse())
def test_async_request_returns_pickleable_snapshot(self):
tmdb = TMDb()
response = _FakeResponse(
@@ -229,3 +282,40 @@ class TmdbResponseCacheTest(TestCase):
self.assertEqual(second_results[0]["media_type"], "movie")
self.assertIsNot(first_results, second_results)
self.assertIsNot(first_results[0], second_results[0])
def test_request_obj_rejects_scalar_snapshot_before_key_lookup(self):
"""
旧缓存中的标量快照不应在读取results字段时触发AttributeError。
"""
tmdb = TMDb()
snapshot = {
TMDb._RESPONSE_SNAPSHOT_MARKER: True,
"headers": {"x-ratelimit-remaining": "39", "x-ratelimit-reset": "1234567890"},
"json": "upstream error",
}
tmdb.request = lambda *args, **kwargs: snapshot
with self.assertRaisesRegex(TMDbException, "返回数据格式异常"):
tmdb._request_obj("/search/movie", key="results")
def test_async_request_obj_rejects_scalar_snapshot_before_key_lookup(self):
"""
异步对象请求读取旧标量快照时也应走统一TMDB异常路径。
"""
tmdb = TMDb()
snapshot = {
TMDb._RESPONSE_SNAPSHOT_MARKER: True,
"headers": {"x-ratelimit-remaining": "39", "x-ratelimit-reset": "1234567890"},
"json": "upstream error",
}
async def _fake_async_request(*args, **kwargs):
"""
模拟异步请求命中已缓存的异常快照。
"""
return snapshot
tmdb.async_request = _fake_async_request
with self.assertRaisesRegex(TMDbException, "返回数据格式异常"):
asyncio.run(tmdb._async_request_obj("/search/movie", key="results"))