From 7fe098a72ed9bd1a1841ff9031b443832ccd4a7c Mon Sep 17 00:00:00 2001 From: Zawwarsami16 Date: Thu, 14 May 2026 03:41:33 -0400 Subject: [PATCH 1/3] fix(sse): preserve trailing newlines + use spec-correct line splitting in format_sse_event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit splitlines() drops trailing empty strings and treats 8 extra characters (\v, \f, \x1c-\x1e, \x85, U+2028, U+2029) as line breaks. SSE only recognizes \n, \r\n, and \r per the spec, and trailing empty data lines are part of the payload — silently dropping them corrupts the stream. Both the data: and the comment branch were affected. Adds 8 unit tests covering trailing-newline preservation, CRLF/CR normalization, and the splitlines() quirks (U+2028, vertical tab) staying inside the payload. Closes #15500 --- fastapi/sse.py | 14 +++++++++-- tests/test_sse.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 75 insertions(+), 3 deletions(-) diff --git a/fastapi/sse.py b/fastapi/sse.py index 901d824964942..a55c7acb522e7 100644 --- a/fastapi/sse.py +++ b/fastapi/sse.py @@ -143,6 +143,16 @@ def _check_data_exclusive(self) -> "ServerSentEvent": return self +def _split_sse_lines(value: str) -> list[str]: + # SSE recognizes only `\n`, `\r\n`, and `\r` as line terminators + # (https://html.spec.whatwg.org/multipage/server-sent-events.html). + # `str.splitlines()` is wrong on two counts: it treats 8 extra characters + # (`\v`, `\f`, `\x1c`-`\x1e`, `\x85`, U+2028, U+2029) as line breaks, and + # it drops a trailing empty string, so e.g. `"hello\n"` would emit only + # one `data:` line instead of two. + return value.replace("\r\n", "\n").replace("\r", "\n").split("\n") + + def format_sse_event( *, data_str: Annotated[ @@ -193,14 +203,14 @@ def format_sse_event( lines: list[str] = [] if comment is not None: - for line in comment.splitlines(): + for line in _split_sse_lines(comment): lines.append(f": {line}") if event is not None: lines.append(f"event: {event}") if data_str is not None: - for line in data_str.splitlines(): + for line in _split_sse_lines(data_str): lines.append(f"data: {line}") if id is not None: diff --git a/tests/test_sse.py b/tests/test_sse.py index 6dfec61838ae6..23361e989fa12 100644 --- a/tests/test_sse.py +++ b/tests/test_sse.py @@ -6,7 +6,7 @@ import pytest from fastapi import APIRouter, FastAPI from fastapi.responses import EventSourceResponse -from fastapi.sse import ServerSentEvent +from fastapi.sse import ServerSentEvent, format_sse_event from fastapi.testclient import TestClient from pydantic import BaseModel @@ -316,3 +316,65 @@ def test_no_keepalive_when_fast(client: TestClient): assert response.status_code == 200 # KEEPALIVE_COMMENT is ": ping\n\n". assert ": ping\n" not in response.text + + +# format_sse_event line-splitting tests +# +# These cover the splitlines() footgun: it drops trailing empty strings and +# treats 8 extra characters as line breaks (vertical tab, form feed, FS/GS/RS, +# NEL, LINE SEPARATOR, PARAGRAPH SEPARATOR). SSE only recognizes \n, \r\n, \r. + + +def test_format_sse_event_preserves_trailing_newline(): + # "Hello\n" should produce TWO data lines: "Hello" and "" (the trailing + # empty line). Pre-fix, splitlines() ate the trailing empty string. + assert format_sse_event(data_str="Hello\n") == b"data: Hello\ndata: \n\n" + + +def test_format_sse_event_preserves_trailing_double_newline(): + assert ( + format_sse_event(data_str="Hello\n\n") + == b"data: Hello\ndata: \ndata: \n\n" + ) + + +def test_format_sse_event_single_newline_data(): + assert format_sse_event(data_str="\n") == b"data: \ndata: \n\n" + + +def test_format_sse_event_crlf_normalizes_to_lf(): + # \r\n is a valid SSE line terminator and should be normalized to \n + # for output, producing the same two data lines as \n input would. + assert ( + format_sse_event(data_str="Hello\r\nWorld") + == b"data: Hello\ndata: World\n\n" + ) + + +def test_format_sse_event_bare_cr_treated_as_line_break(): + # Lone \r is also a valid SSE line terminator per the spec. + assert ( + format_sse_event(data_str="Hello\rWorld") + == b"data: Hello\ndata: World\n\n" + ) + + +def test_format_sse_event_unicode_line_separator_not_split(): + # U+2028 LINE SEPARATOR is treated as a line break by str.splitlines() + # but is NOT a line terminator in the SSE spec. It must stay inside the + # data payload, not be promoted to a new "data:" line. + assert ( + format_sse_event(data_str="A
B") == "data: A
B\n\n".encode() + ) + + +def test_format_sse_event_vertical_tab_not_split(): + # \v is treated as a line break by splitlines() but not by SSE. + assert ( + format_sse_event(data_str="A\vB") == b"data: A\x0bB\n\n" + ) + + +def test_format_sse_event_comment_preserves_trailing_newline(): + # Same bug existed in the comment branch. + assert format_sse_event(comment="hi\n") == b": hi\n: \n\n" From 29112d6d2b8f26b4a476c49731808834818cdd00 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci-lite[bot]" <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Date: Thu, 14 May 2026 07:43:20 +0000 Subject: [PATCH 2/3] =?UTF-8?q?=F0=9F=8E=A8=20Auto=20format?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_sse.py | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/tests/test_sse.py b/tests/test_sse.py index 23361e989fa12..11dfa4cd35aa4 100644 --- a/tests/test_sse.py +++ b/tests/test_sse.py @@ -332,10 +332,7 @@ def test_format_sse_event_preserves_trailing_newline(): def test_format_sse_event_preserves_trailing_double_newline(): - assert ( - format_sse_event(data_str="Hello\n\n") - == b"data: Hello\ndata: \ndata: \n\n" - ) + assert format_sse_event(data_str="Hello\n\n") == b"data: Hello\ndata: \ndata: \n\n" def test_format_sse_event_single_newline_data(): @@ -346,33 +343,25 @@ def test_format_sse_event_crlf_normalizes_to_lf(): # \r\n is a valid SSE line terminator and should be normalized to \n # for output, producing the same two data lines as \n input would. assert ( - format_sse_event(data_str="Hello\r\nWorld") - == b"data: Hello\ndata: World\n\n" + format_sse_event(data_str="Hello\r\nWorld") == b"data: Hello\ndata: World\n\n" ) def test_format_sse_event_bare_cr_treated_as_line_break(): # Lone \r is also a valid SSE line terminator per the spec. - assert ( - format_sse_event(data_str="Hello\rWorld") - == b"data: Hello\ndata: World\n\n" - ) + assert format_sse_event(data_str="Hello\rWorld") == b"data: Hello\ndata: World\n\n" def test_format_sse_event_unicode_line_separator_not_split(): # U+2028 LINE SEPARATOR is treated as a line break by str.splitlines() # but is NOT a line terminator in the SSE spec. It must stay inside the # data payload, not be promoted to a new "data:" line. - assert ( - format_sse_event(data_str="A
B") == "data: A
B\n\n".encode() - ) + assert format_sse_event(data_str="A
B") == "data: A
B\n\n".encode() def test_format_sse_event_vertical_tab_not_split(): # \v is treated as a line break by splitlines() but not by SSE. - assert ( - format_sse_event(data_str="A\vB") == b"data: A\x0bB\n\n" - ) + assert format_sse_event(data_str="A\vB") == b"data: A\x0bB\n\n" def test_format_sse_event_comment_preserves_trailing_newline(): From 866e577031a22a8bbca6f0d122fc129164725565 Mon Sep 17 00:00:00 2001 From: Zawwar Sami Date: Wed, 27 May 2026 19:31:11 +0000 Subject: [PATCH 3/3] address review: shorten comment, parametrize tests, add empty-data case --- fastapi/sse.py | 8 ++---- tests/test_sse.py | 70 ++++++++++++++++------------------------------- 2 files changed, 25 insertions(+), 53 deletions(-) diff --git a/fastapi/sse.py b/fastapi/sse.py index a55c7acb522e7..4259248e679d9 100644 --- a/fastapi/sse.py +++ b/fastapi/sse.py @@ -144,12 +144,8 @@ def _check_data_exclusive(self) -> "ServerSentEvent": def _split_sse_lines(value: str) -> list[str]: - # SSE recognizes only `\n`, `\r\n`, and `\r` as line terminators - # (https://html.spec.whatwg.org/multipage/server-sent-events.html). - # `str.splitlines()` is wrong on two counts: it treats 8 extra characters - # (`\v`, `\f`, `\x1c`-`\x1e`, `\x85`, U+2028, U+2029) as line breaks, and - # it drops a trailing empty string, so e.g. `"hello\n"` would emit only - # one `data:` line instead of two. + # Split on SSE-spec line terminators only (\n, \r\n, \r), preserving + # trailing empty strings. return value.replace("\r\n", "\n").replace("\r", "\n").split("\n") diff --git a/tests/test_sse.py b/tests/test_sse.py index 11dfa4cd35aa4..39ab9c42433a6 100644 --- a/tests/test_sse.py +++ b/tests/test_sse.py @@ -318,52 +318,28 @@ def test_no_keepalive_when_fast(client: TestClient): assert ": ping\n" not in response.text -# format_sse_event line-splitting tests -# -# These cover the splitlines() footgun: it drops trailing empty strings and -# treats 8 extra characters as line breaks (vertical tab, form feed, FS/GS/RS, -# NEL, LINE SEPARATOR, PARAGRAPH SEPARATOR). SSE only recognizes \n, \r\n, \r. - - -def test_format_sse_event_preserves_trailing_newline(): - # "Hello\n" should produce TWO data lines: "Hello" and "" (the trailing - # empty line). Pre-fix, splitlines() ate the trailing empty string. - assert format_sse_event(data_str="Hello\n") == b"data: Hello\ndata: \n\n" - - -def test_format_sse_event_preserves_trailing_double_newline(): - assert format_sse_event(data_str="Hello\n\n") == b"data: Hello\ndata: \ndata: \n\n" - - -def test_format_sse_event_single_newline_data(): - assert format_sse_event(data_str="\n") == b"data: \ndata: \n\n" - - -def test_format_sse_event_crlf_normalizes_to_lf(): - # \r\n is a valid SSE line terminator and should be normalized to \n - # for output, producing the same two data lines as \n input would. - assert ( - format_sse_event(data_str="Hello\r\nWorld") == b"data: Hello\ndata: World\n\n" - ) - - -def test_format_sse_event_bare_cr_treated_as_line_break(): - # Lone \r is also a valid SSE line terminator per the spec. - assert format_sse_event(data_str="Hello\rWorld") == b"data: Hello\ndata: World\n\n" - - -def test_format_sse_event_unicode_line_separator_not_split(): - # U+2028 LINE SEPARATOR is treated as a line break by str.splitlines() - # but is NOT a line terminator in the SSE spec. It must stay inside the - # data payload, not be promoted to a new "data:" line. - assert format_sse_event(data_str="A
B") == "data: A
B\n\n".encode() - - -def test_format_sse_event_vertical_tab_not_split(): - # \v is treated as a line break by splitlines() but not by SSE. - assert format_sse_event(data_str="A\vB") == b"data: A\x0bB\n\n" +@pytest.mark.parametrize( + ("data", "expected_result"), + [ + ("Hello\n", b"data: Hello\ndata: \n\n"), + ("Hello\n\n", b"data: Hello\ndata: \ndata: \n\n"), + ("\n", b"data: \ndata: \n\n"), + ("Hello\r\nWorld", b"data: Hello\ndata: World\n\n"), + ("Hello\rWorld", b"data: Hello\ndata: World\n\n"), + ("A\u2028B", "data: A\u2028B\n\n".encode()), + ("A\vB", b"data: A\x0bB\n\n"), + ], +) +def test_format_sse_event_splitlines_behavior_in_data( + data: str, expected_result: bytes +) -> None: + assert format_sse_event(data_str=data) == expected_result + + +def test_format_sse_event_splitlines_behavior_in_comment(): + assert format_sse_event(comment="hi\n") == b": hi\n: \n\n" -def test_format_sse_event_comment_preserves_trailing_newline(): - # Same bug existed in the comment branch. - assert format_sse_event(comment="hi\n") == b": hi\n: \n\n" +def test_format_sse_event_keeps_empty_data_line(): + payload = format_sse_event(data_str="") + assert payload == b"data: \n\n"