diff --git a/Polyfills/TextDecoder/Source/TextDecoder.cpp b/Polyfills/TextDecoder/Source/TextDecoder.cpp index 10e1d00b..56085432 100644 --- a/Polyfills/TextDecoder/Source/TextDecoder.cpp +++ b/Polyfills/TextDecoder/Source/TextDecoder.cpp @@ -33,7 +33,39 @@ namespace if (info.Length() > 0 && info[0].IsString()) { auto encoding = info[0].As().Utf8Value(); - if (encoding != "utf-8" && encoding != "UTF-8") + + // Normalize per the WHATWG Encoding Standard "get an encoding" algorithm: + // strip leading/trailing ASCII whitespace and lowercase before matching the + // label. Several labels (e.g. "utf8", "unicode-1-1-utf-8") all map to UTF-8; + // callers such as the glTF/Draco loader pass "utf8". + const auto isAsciiWhitespace = [](char c) { + return c == '\t' || c == '\n' || c == '\f' || c == '\r' || c == ' '; + }; + size_t begin = 0; + size_t end = encoding.size(); + while (begin < end && isAsciiWhitespace(encoding[begin])) + { + ++begin; + } + while (end > begin && isAsciiWhitespace(encoding[end - 1])) + { + --end; + } + std::string label = encoding.substr(begin, end - begin); + for (auto& c : label) + { + if (c >= 'A' && c <= 'Z') + { + c = static_cast(c - 'A' + 'a'); + } + } + + if (label != "utf-8" && + label != "utf8" && + label != "unicode-1-1-utf-8" && + label != "unicode11utf8" && + label != "unicode20utf8" && + label != "x-unicode20utf8") { throw Napi::Error::New(Env(), "TextDecoder: unsupported encoding '" + encoding + "', only 'utf-8' is supported"); } diff --git a/Tests/UnitTests/Scripts/tests.ts b/Tests/UnitTests/Scripts/tests.ts index e0647092..344a41cc 100644 --- a/Tests/UnitTests/Scripts/tests.ts +++ b/Tests/UnitTests/Scripts/tests.ts @@ -1490,6 +1490,30 @@ describe("TextDecoder", function () { expect(result).to.equal("H\0i"); expect(result.length).to.equal(3); }); + + it("should accept the WHATWG 'utf8' label (no hyphen)", function () { + const decoder = new TextDecoder("utf8"); + const result = decoder.decode(new Uint8Array([72, 105])); // "Hi" + expect(result).to.equal("Hi"); + }); + + it("should accept utf-8 labels case-insensitively and with surrounding whitespace", function () { + for (const label of ["UTF-8", "UTF8", " utf-8 ", "\tUtf8\n"]) { + const decoder = new TextDecoder(label); + expect(decoder.decode(new Uint8Array([79, 75]))).to.equal("OK"); + } + }); + + it("should accept the other WHATWG utf-8 aliases", function () { + for (const label of ["unicode-1-1-utf-8", "unicode11utf8", "unicode20utf8", "x-unicode20utf8"]) { + const decoder = new TextDecoder(label); + expect(decoder.decode(new Uint8Array([79, 75]))).to.equal("OK"); + } + }); + + it("should still throw for a genuinely unsupported encoding", function () { + expect(() => new TextDecoder("utf-16")).to.throw(); + }); }); describe("TextEncoder", function () {