From 4d2ea5117f0694ba30584fb4eba5c93c53bdf8e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Branimir=20Karad=C5=BEi=C4=87?= Date: Fri, 12 Jun 2026 20:08:17 -0700 Subject: [PATCH] TextDecoder: accept all WHATWG utf-8 encoding labels The TextDecoder constructor only accepted the exact labels "utf-8"/"UTF-8" and threw for every other spelling. Per the WHATWG Encoding Standard, an encoding label is matched after stripping leading/trailing ASCII whitespace and ASCII-lowercasing, and several labels ("utf8", "unicode-1-1-utf-8", "unicode11utf8", "unicode20utf8", "x-unicode20utf8") all map to UTF-8. Consumers such as the Babylon.js glTF/Draco loader construct `new TextDecoder("utf8")`; the throw aborted decoding mid-load and (in Babylon Native) left the loader in a state that drove a native out-of-bounds write, observed as non-deterministic heap corruption on the Draco validation tests. Normalize the label per spec and accept all UTF-8 labels. Adds regression tests for "utf8", case/whitespace variants, the other aliases, and a still-rejected unsupported encoding. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Polyfills/TextDecoder/Source/TextDecoder.cpp | 34 +++++++++++++++++++- Tests/UnitTests/Scripts/tests.ts | 24 ++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/Polyfills/TextDecoder/Source/TextDecoder.cpp b/Polyfills/TextDecoder/Source/TextDecoder.cpp index 10e1d00b..56085432 100644 --- a/Polyfills/TextDecoder/Source/TextDecoder.cpp +++ b/Polyfills/TextDecoder/Source/TextDecoder.cpp @@ -33,7 +33,39 @@ namespace if (info.Length() > 0 && info[0].IsString()) { auto encoding = info[0].As().Utf8Value(); - if (encoding != "utf-8" && encoding != "UTF-8") + + // Normalize per the WHATWG Encoding Standard "get an encoding" algorithm: + // strip leading/trailing ASCII whitespace and lowercase before matching the + // label. Several labels (e.g. "utf8", "unicode-1-1-utf-8") all map to UTF-8; + // callers such as the glTF/Draco loader pass "utf8". + const auto isAsciiWhitespace = [](char c) { + return c == '\t' || c == '\n' || c == '\f' || c == '\r' || c == ' '; + }; + size_t begin = 0; + size_t end = encoding.size(); + while (begin < end && isAsciiWhitespace(encoding[begin])) + { + ++begin; + } + while (end > begin && isAsciiWhitespace(encoding[end - 1])) + { + --end; + } + std::string label = encoding.substr(begin, end - begin); + for (auto& c : label) + { + if (c >= 'A' && c <= 'Z') + { + c = static_cast(c - 'A' + 'a'); + } + } + + if (label != "utf-8" && + label != "utf8" && + label != "unicode-1-1-utf-8" && + label != "unicode11utf8" && + label != "unicode20utf8" && + label != "x-unicode20utf8") { throw Napi::Error::New(Env(), "TextDecoder: unsupported encoding '" + encoding + "', only 'utf-8' is supported"); } diff --git a/Tests/UnitTests/Scripts/tests.ts b/Tests/UnitTests/Scripts/tests.ts index e0647092..344a41cc 100644 --- a/Tests/UnitTests/Scripts/tests.ts +++ b/Tests/UnitTests/Scripts/tests.ts @@ -1490,6 +1490,30 @@ describe("TextDecoder", function () { expect(result).to.equal("H\0i"); expect(result.length).to.equal(3); }); + + it("should accept the WHATWG 'utf8' label (no hyphen)", function () { + const decoder = new TextDecoder("utf8"); + const result = decoder.decode(new Uint8Array([72, 105])); // "Hi" + expect(result).to.equal("Hi"); + }); + + it("should accept utf-8 labels case-insensitively and with surrounding whitespace", function () { + for (const label of ["UTF-8", "UTF8", " utf-8 ", "\tUtf8\n"]) { + const decoder = new TextDecoder(label); + expect(decoder.decode(new Uint8Array([79, 75]))).to.equal("OK"); + } + }); + + it("should accept the other WHATWG utf-8 aliases", function () { + for (const label of ["unicode-1-1-utf-8", "unicode11utf8", "unicode20utf8", "x-unicode20utf8"]) { + const decoder = new TextDecoder(label); + expect(decoder.decode(new Uint8Array([79, 75]))).to.equal("OK"); + } + }); + + it("should still throw for a genuinely unsupported encoding", function () { + expect(() => new TextDecoder("utf-16")).to.throw(); + }); }); describe("TextEncoder", function () {