From a43da95381805e9de1850239da2ce507a3ef77e0 Mon Sep 17 00:00:00 2001 From: SAY-5 Date: Tue, 12 May 2026 00:18:48 -0700 Subject: [PATCH 1/3] fix: handle non-JSON error response bodies in _error_body (#242) Signed-off-by: SAY-5 --- dataretrieval/waterdata/utils.py | 9 +++++- tests/waterdata_utils_test.py | 53 ++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 413da7dd..c1094ae4 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -361,7 +361,14 @@ def _error_body(resp: requests.Response): "403: Query request denied. Possible reasons include " "query exceeding server limits." ) - j_txt = resp.json() + try: + j_txt = resp.json() + except ValueError: + snippet = (resp.text or "").strip()[:200] + reason = resp.reason or "Error" + if snippet: + return f"{status}: {reason}. {snippet}" + return f"{status}: {reason}." return ( f"{status}: {j_txt.get('code', 'Unknown type')}. " f"{j_txt.get('description', 'No description provided')}." diff --git a/tests/waterdata_utils_test.py b/tests/waterdata_utils_test.py index 8151ab37..87475875 100644 --- a/tests/waterdata_utils_test.py +++ b/tests/waterdata_utils_test.py @@ -5,6 +5,7 @@ from dataretrieval.waterdata.utils import ( _arrange_cols, + _error_body, _format_api_dates, _get_args, _handle_stats_nesting, @@ -221,3 +222,55 @@ def test_format_api_dates_open_ended_range_with_none(): """A None / NaN endpoint becomes '..' in the output range.""" assert _format_api_dates(["2024-01-01", None], date=True) == "2024-01-01/.." assert _format_api_dates([None, "2024-01-01"], date=True) == "../2024-01-01" + + +def _make_response(status, body, reason=None, content_type="text/html"): + resp = requests.Response() + resp.status_code = status + resp.reason = reason + resp._content = body.encode("utf-8") + resp.headers["Content-Type"] = content_type + return resp + + +def test_error_body_handles_non_json_html_response(): + """A non-JSON 502 HTML body must be summarized, not raise JSONDecodeError.""" + html = ( + "\r\n502 Bad Gateway" + "

502 Bad Gateway


" + "
openresty
" + ) + resp = _make_response(502, html, reason="Bad Gateway") + msg = _error_body(resp) + assert "502" in msg + assert "Bad Gateway" in msg + + +def test_error_body_handles_empty_response_body(): + """An empty error body returns a status/reason message without crashing.""" + resp = _make_response(500, "", reason="Internal Server Error") + msg = _error_body(resp) + assert msg == "500: Internal Server Error." + + +def test_error_body_truncates_long_non_json_body(): + """Non-JSON bodies are truncated to 200 chars to keep the message readable.""" + body = "x" * 500 + resp = _make_response(502, body, reason="Bad Gateway") + msg = _error_body(resp) + assert len(msg) <= 64 + 200 # status/reason prefix + 200-char snippet + assert "x" * 200 in msg + + +def test_error_body_still_parses_well_formed_json(): + """JSON error bodies continue to render code/description fields.""" + resp = _make_response( + 400, + '{"code": "BadRequest", "description": "missing parameter"}', + reason="Bad Request", + content_type="application/json", + ) + msg = _error_body(resp) + assert "400" in msg + assert "BadRequest" in msg + assert "missing parameter" in msg From 3ef950535628431b0c475cb1c69e7e490923c58b Mon Sep 17 00:00:00 2001 From: Timothy Hodson <34148978+thodson-usgs@users.noreply.github.com> Date: Tue, 12 May 2026 08:41:43 -0500 Subject: [PATCH 2/3] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- tests/waterdata_utils_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/waterdata_utils_test.py b/tests/waterdata_utils_test.py index 87475875..f472000e 100644 --- a/tests/waterdata_utils_test.py +++ b/tests/waterdata_utils_test.py @@ -255,11 +255,11 @@ def test_error_body_handles_empty_response_body(): def test_error_body_truncates_long_non_json_body(): """Non-JSON bodies are truncated to 200 chars to keep the message readable.""" - body = "x" * 500 + body = ("x" * 200) + "Y" + ("z" * 299) resp = _make_response(502, body, reason="Bad Gateway") msg = _error_body(resp) - assert len(msg) <= 64 + 200 # status/reason prefix + 200-char snippet assert "x" * 200 in msg + assert (("x" * 200) + "Y") not in msg def test_error_body_still_parses_well_formed_json(): From 0aa5a55fbf881bea3ee467546b09b834f0696de8 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Tue, 12 May 2026 08:43:41 -0500 Subject: [PATCH 3/3] Accurate docstring for _error_body The existing docstring described behavior that never matched the code: it said 429 returned a JSON `message` field (it returns a hardcoded string) and that other statuses returned "raw response text" (they return code/description from the JSON envelope, or a status/reason + snippet for non-JSON bodies). Spell out the three branches (429 hardcoded, 403 hardcoded, JSON-with-non-JSON-fallback) so the docstring matches what the function actually returns. Per Copilot review on PR #274. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/waterdata/utils.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index c1094ae4..378b864b 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -335,7 +335,7 @@ def _check_ogc_requests(endpoint: str = "daily", req_type: str = "queryables"): def _error_body(resp: requests.Response): """ - Provide more informative error messages based on the response status. + Build an informative error message from an HTTP response. Parameters ---------- @@ -345,10 +345,19 @@ def _error_body(resp: requests.Response): Returns ------- str - The extracted error message. For status code 429, returns the 'message' - field from the JSON error object. For status code 403, returns a - predefined message indicating possible reasons for denial. For other - status codes, returns the raw response text. + An error message string assembled per status code: + + * **429** — predefined message describing the rate-limit and pointing + at the API-token path; the response body is not consulted. + * **403** — predefined message describing the most common cause + (query exceeding server limits); the response body is not + consulted. + * **other statuses** — attempts ``resp.json()`` and renders + ``": . ."`` from the JSON error + envelope. If the body is not JSON (e.g. an HTML 502 from a + gateway), falls back to ``": . "`` with + the first 200 characters of ``resp.text``; an empty body + degrades to ``": ."``. """ status = resp.status_code if status == 429: