From 6d7bd0827801ee3d9058bec0ccdb61aea11b34b4 Mon Sep 17 00:00:00 2001 From: Arturo Bernal Date: Thu, 16 Apr 2026 08:47:01 +0200 Subject: [PATCH] HTTPCLIENT-2418 - Fix default charset handling in SimpleBody for JSON content.Use UTF-8 instead of US-ASCII when no charset parameter is present. --- .../http/async/methods/SimpleBody.java | 15 ++-- .../http/async/methods/SimpleBodyTest.java | 77 +++++++++++++++++++ 2 files changed, 84 insertions(+), 8 deletions(-) create mode 100644 httpclient5/src/test/java/org/apache/hc/client5/http/async/methods/SimpleBodyTest.java diff --git a/httpclient5/src/main/java/org/apache/hc/client5/http/async/methods/SimpleBody.java b/httpclient5/src/main/java/org/apache/hc/client5/http/async/methods/SimpleBody.java index d78abc89cd..c1d635086a 100644 --- a/httpclient5/src/main/java/org/apache/hc/client5/http/async/methods/SimpleBody.java +++ b/httpclient5/src/main/java/org/apache/hc/client5/http/async/methods/SimpleBody.java @@ -55,8 +55,8 @@ static SimpleBody create(final String body, final ContentType contentType) { if (body.length() > 2048) { return new SimpleBody(null, body, contentType); } - final Charset charset = (contentType != null ? contentType : ContentType.DEFAULT_TEXT).getCharset(); - final byte[] bytes = body.getBytes(charset != null ? charset : StandardCharsets.US_ASCII); + final Charset charset = (contentType != null ? contentType : ContentType.DEFAULT_TEXT).getCharset(StandardCharsets.UTF_8); + final byte[] bytes = body.getBytes(charset); return new SimpleBody(bytes, null, contentType); } @@ -83,8 +83,8 @@ public byte[] getBodyBytes() { if (bodyAsBytes != null) { return bodyAsBytes; } else if (bodyAsText != null) { - final Charset charset = (contentType != null ? contentType : ContentType.DEFAULT_TEXT).getCharset(); - return bodyAsText.getBytes(charset != null ? charset : StandardCharsets.US_ASCII); + final Charset charset = (contentType != null ? contentType : ContentType.DEFAULT_TEXT).getCharset(StandardCharsets.UTF_8); + return bodyAsText.getBytes(charset); } else { return null; } @@ -97,8 +97,8 @@ public byte[] getBodyBytes() { */ public String getBodyText() { if (bodyAsBytes != null) { - final Charset charset = (contentType != null ? contentType : ContentType.DEFAULT_TEXT).getCharset(); - return new String(bodyAsBytes, charset != null ? charset : StandardCharsets.US_ASCII); + final Charset charset = (contentType != null ? contentType : ContentType.DEFAULT_TEXT).getCharset(StandardCharsets.UTF_8); + return new String(bodyAsBytes, charset); } return bodyAsText; } @@ -127,5 +127,4 @@ public String toString() { ", content type=" + contentType + "}"; } -} - +} \ No newline at end of file diff --git a/httpclient5/src/test/java/org/apache/hc/client5/http/async/methods/SimpleBodyTest.java b/httpclient5/src/test/java/org/apache/hc/client5/http/async/methods/SimpleBodyTest.java new file mode 100644 index 0000000000..1a7b494e78 --- /dev/null +++ b/httpclient5/src/test/java/org/apache/hc/client5/http/async/methods/SimpleBodyTest.java @@ -0,0 +1,77 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ +package org.apache.hc.client5.http.async.methods; + +import java.nio.charset.StandardCharsets; + +import org.apache.hc.core5.http.ContentType; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class TestSimpleBody { + + @Test + void testGetBodyTextUsesUtf8ForJsonWithoutCharsetParameter() { + final String message = "{\"msg\": \"Test emoji 👋\"}"; + final SimpleBody body = SimpleBody.create( + message.getBytes(StandardCharsets.UTF_8), + ContentType.parse("application/json")); + + Assertions.assertEquals(message, body.getBodyText()); + } + + @Test + void testGetBodyBytesUsesUtf8ForJsonWithoutCharsetParameter() { + final String message = "{\"msg\": \"Test emoji 👋\"}"; + final SimpleBody body = SimpleBody.create( + message, + ContentType.parse("application/json")); + + Assertions.assertArrayEquals(message.getBytes(StandardCharsets.UTF_8), body.getBodyBytes()); + } + + @Test + void testGetBodyTextUsesUtf8ForProblemJsonWithoutCharsetParameter() { + final String message = "{\"title\": \"Bad request 👋\"}"; + final SimpleBody body = SimpleBody.create( + message.getBytes(StandardCharsets.UTF_8), + ContentType.parse("application/problem+json")); + + Assertions.assertEquals(message, body.getBodyText()); + } + + @Test + void testExplicitCharsetStillWins() { + final String message = "{\"msg\": \"hi\"}"; + final byte[] utf16 = message.getBytes(StandardCharsets.UTF_16); + final SimpleBody body = SimpleBody.create( + utf16, + ContentType.parse("application/json; charset=UTF-16")); + + Assertions.assertEquals(message, body.getBodyText()); + } +} \ No newline at end of file