diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 413da7dd..378b864b 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -335,7 +335,7 @@ def _check_ogc_requests(endpoint: str = "daily", req_type: str = "queryables"): def _error_body(resp: requests.Response): """ - Provide more informative error messages based on the response status. + Build an informative error message from an HTTP response. Parameters ---------- @@ -345,10 +345,19 @@ def _error_body(resp: requests.Response): Returns ------- str - The extracted error message. For status code 429, returns the 'message' - field from the JSON error object. For status code 403, returns a - predefined message indicating possible reasons for denial. For other - status codes, returns the raw response text. + An error message string assembled per status code: + + * **429** — predefined message describing the rate-limit and pointing + at the API-token path; the response body is not consulted. + * **403** — predefined message describing the most common cause + (query exceeding server limits); the response body is not + consulted. + * **other statuses** — attempts ``resp.json()`` and renders + ``": . ."`` from the JSON error + envelope. If the body is not JSON (e.g. an HTML 502 from a + gateway), falls back to ``": . "`` with + the first 200 characters of ``resp.text``; an empty body + degrades to ``": ."``. """ status = resp.status_code if status == 429: @@ -361,7 +370,14 @@ def _error_body(resp: requests.Response): "403: Query request denied. Possible reasons include " "query exceeding server limits." ) - j_txt = resp.json() + try: + j_txt = resp.json() + except ValueError: + snippet = (resp.text or "").strip()[:200] + reason = resp.reason or "Error" + if snippet: + return f"{status}: {reason}. {snippet}" + return f"{status}: {reason}." return ( f"{status}: {j_txt.get('code', 'Unknown type')}. " f"{j_txt.get('description', 'No description provided')}." diff --git a/tests/waterdata_utils_test.py b/tests/waterdata_utils_test.py index 8151ab37..f472000e 100644 --- a/tests/waterdata_utils_test.py +++ b/tests/waterdata_utils_test.py @@ -5,6 +5,7 @@ from dataretrieval.waterdata.utils import ( _arrange_cols, + _error_body, _format_api_dates, _get_args, _handle_stats_nesting, @@ -221,3 +222,55 @@ def test_format_api_dates_open_ended_range_with_none(): """A None / NaN endpoint becomes '..' in the output range.""" assert _format_api_dates(["2024-01-01", None], date=True) == "2024-01-01/.." assert _format_api_dates([None, "2024-01-01"], date=True) == "../2024-01-01" + + +def _make_response(status, body, reason=None, content_type="text/html"): + resp = requests.Response() + resp.status_code = status + resp.reason = reason + resp._content = body.encode("utf-8") + resp.headers["Content-Type"] = content_type + return resp + + +def test_error_body_handles_non_json_html_response(): + """A non-JSON 502 HTML body must be summarized, not raise JSONDecodeError.""" + html = ( + "\r\n502 Bad Gateway" + "

502 Bad Gateway


" + "
openresty
" + ) + resp = _make_response(502, html, reason="Bad Gateway") + msg = _error_body(resp) + assert "502" in msg + assert "Bad Gateway" in msg + + +def test_error_body_handles_empty_response_body(): + """An empty error body returns a status/reason message without crashing.""" + resp = _make_response(500, "", reason="Internal Server Error") + msg = _error_body(resp) + assert msg == "500: Internal Server Error." + + +def test_error_body_truncates_long_non_json_body(): + """Non-JSON bodies are truncated to 200 chars to keep the message readable.""" + body = ("x" * 200) + "Y" + ("z" * 299) + resp = _make_response(502, body, reason="Bad Gateway") + msg = _error_body(resp) + assert "x" * 200 in msg + assert (("x" * 200) + "Y") not in msg + + +def test_error_body_still_parses_well_formed_json(): + """JSON error bodies continue to render code/description fields.""" + resp = _make_response( + 400, + '{"code": "BadRequest", "description": "missing parameter"}', + reason="Bad Request", + content_type="application/json", + ) + msg = _error_body(resp) + assert "400" in msg + assert "BadRequest" in msg + assert "missing parameter" in msg