From 285077fa250298921003ab508018cb65671452fd Mon Sep 17 00:00:00 2001 From: "Phong X. Nguyen" Date: Thu, 11 Jun 2026 16:38:07 +0000 Subject: [PATCH 1/3] Use ls-hpack's fast Huffman decoder for HPACK/QPACK strings The vendored ls-hpack code only ported the conservative 4-bit FSM decoder (lshpack_dec_huff_decode_full), although the 16-bit decode table it needs (hdecs) has been in huff-tables.h all along. Port the fast decoder, which emits up to 3 bytes per table lookup and falls back to the full decoder for codes longer than 16 bits. Decoding typical header values is about 2x faster (see the new tools/benchmark/benchmark_HuffmanDecode.cc). One deliberate divergence from upstream ls-hpack: the tail check also rejects padding of 8 or more bits, per RFC 7541 section 5.2, keeping the strictness of the FSM decoder. New differential tests assert the two decoders agree on validity and content for exhaustive short inputs and seeded fuzz. The only permitted difference is an exactly-sized destination, where the FSM decoder can spuriously report LSHPACK_ERR_MORE_BUF; ATS callers always allocate twice the encoded length, so they are unaffected. Co-Authored-By: Claude Fable 5 --- lib/ls-hpack/CMakeLists.txt | 1 + lib/ls-hpack/lshpack.cc | 190 +++++++++++++++++- lib/ls-hpack/lshpack.h | 3 + src/proxy/hdrs/HuffmanCodec.cc | 4 +- src/proxy/hdrs/unit_tests/test_Huffmancode.cc | 114 +++++++++++ tools/benchmark/CMakeLists.txt | 3 + tools/benchmark/benchmark_HuffmanDecode.cc | 142 +++++++++++++ 7 files changed, 454 insertions(+), 3 deletions(-) create mode 100644 tools/benchmark/benchmark_HuffmanDecode.cc diff --git a/lib/ls-hpack/CMakeLists.txt b/lib/ls-hpack/CMakeLists.txt index eb4a78bd0c2..ae4327c5f06 100644 --- a/lib/ls-hpack/CMakeLists.txt +++ b/lib/ls-hpack/CMakeLists.txt @@ -17,6 +17,7 @@ add_library(lshpack STATIC) target_sources(lshpack PRIVATE lshpack.cc huff-tables.h lshpack.h) +target_include_directories(lshpack PUBLIC ${CMAKE_SOURCE_DIR}/lib) # lshpack.h uses C99 array designators (see the hencs array in huff-tables.h). # In theory this limits portability because very old compilers that are C89 only diff --git a/lib/ls-hpack/lshpack.cc b/lib/ls-hpack/lshpack.cc index 6e7f0ee0e6f..f45ff1a5098 100644 --- a/lib/ls-hpack/lshpack.cc +++ b/lib/ls-hpack/lshpack.cc @@ -52,6 +52,8 @@ SOFTWARE. */ #include "huff-tables.h" +#include +#include #include #include @@ -61,7 +63,8 @@ namespace litespeed { namespace { -constexpr int64_t LSHPACK_ERR_MORE_BUF = -3; +constexpr int64_t LSHPACK_ERR_MORE_BUF = -3; +constexpr unsigned SHORTEST_CODE = 5; struct decode_status { uint8_t state; @@ -243,4 +246,189 @@ lshpack_dec_huff_decode_full(uint8_t const *src, uint32_t src_len, char *dst, ui return p_dst - dst; } +// Implementation taken from LiteSpeed: +// lshpack_dec_huff_decode +// +// The decoder is optimized for the common case. Most of the time, we decode +// data whose encoding is 16 bits or shorter. This lets us use a 64 KB table +// indexed by two bytes of input and outputs 1, 2, or 3 bytes at a time. +// +// In the case a longer code is encountered, we fall back to the original +// Huffman decoder that supports all code lengths. +int64_t +lshpack_dec_huff_decode(uint8_t const *src, uint32_t src_len, char *dst, uint32_t dst_len) +{ + char *const orig_dst = dst; + uint8_t const *const src_end = src + src_len; + char *const dst_end = dst + dst_len; + uintptr_t buf = 0; + unsigned avail_bits, len; + struct hdec hdec = {0, {0, 0, 0}}; + uint16_t idx; + int64_t r; + + avail_bits = 0; + while (true) { + if (src + sizeof(buf) <= src_end) { + len = (sizeof(buf) * 8 - avail_bits) >> 3; + avail_bits += len << 3; + switch (len) { +#if UINTPTR_MAX == 18446744073709551615ull + case 8: + buf <<= 8; + buf |= static_cast(*src++); + [[fallthrough]]; + case 7: + buf <<= 8; + buf |= static_cast(*src++); + [[fallthrough]]; + default: + buf <<= 48; + buf |= static_cast(*src++) << 40; + buf |= static_cast(*src++) << 32; + buf |= static_cast(*src++) << 24; + buf |= static_cast(*src++) << 16; +#else + [[fallthrough]]; + case 4: + buf <<= 8; + buf |= static_cast(*src++); + [[fallthrough]]; + case 3: + buf <<= 8; + buf |= static_cast(*src++); + [[fallthrough]]; + default: + buf <<= 16; +#endif + buf |= static_cast(*src++) << 8; + buf |= static_cast(*src++) << 0; + } + } else if (src < src_end) { + do { + buf <<= 8; + buf |= static_cast(*src++); + avail_bits += 8; + } while (src < src_end && avail_bits <= sizeof(buf) * 8 - 8); + } else { + break; // Normal case terminating condition: out of input + } + + if (dst_end - dst >= static_cast(8 * sizeof(buf) / SHORTEST_CODE) && avail_bits >= 16) { + // Fast path: don't check destination bounds + do { + idx = static_cast(buf >> (avail_bits - 16)); + hdec = hdecs[idx]; + dst[0] = static_cast(hdec.out[0]); + dst[1] = static_cast(hdec.out[1]); + dst[2] = static_cast(hdec.out[2]); + dst += hdec.lens & 3; + avail_bits -= hdec.lens >> 2; + } while (avail_bits >= 16 && hdec.lens); + if (avail_bits < 16) { + continue; + } + goto slow_path; + } else { + while (avail_bits >= 16) { + idx = static_cast(buf >> (avail_bits - 16)); + hdec = hdecs[idx]; + len = hdec.lens & 3; + if (len && dst + len <= dst_end) { + switch (len) { + case 3: + *dst++ = static_cast(hdec.out[0]); + *dst++ = static_cast(hdec.out[1]); + *dst++ = static_cast(hdec.out[2]); + break; + case 2: + *dst++ = static_cast(hdec.out[0]); + *dst++ = static_cast(hdec.out[1]); + break; + default: + *dst++ = static_cast(hdec.out[0]); + break; + } + avail_bits -= hdec.lens >> 2; + } else if (dst + len > dst_end) { + r = dst_end - dst - static_cast(len); + if (r > LSHPACK_ERR_MORE_BUF) { + r = LSHPACK_ERR_MORE_BUF; + } + return r; + } else { + goto slow_path; + } + } + } + } + + if (avail_bits >= SHORTEST_CODE) { + idx = static_cast(buf << (16 - avail_bits)); + idx |= (1 << (16 - avail_bits)) - 1; // EOF + if (idx == 0xFFFF && avail_bits < 8) { + goto end; + } + // If a byte or more of input is left, this means there is a valid + // encoding, not just EOF. + hdec = hdecs[idx]; + len = hdec.lens & 3; + if ((static_cast(hdec.lens) >> 2) > avail_bits) { + return -1; + } + if (len && dst + len <= dst_end) { + switch (len) { + case 3: + *dst++ = static_cast(hdec.out[0]); + *dst++ = static_cast(hdec.out[1]); + *dst++ = static_cast(hdec.out[2]); + break; + case 2: + *dst++ = static_cast(hdec.out[0]); + *dst++ = static_cast(hdec.out[1]); + break; + default: + *dst++ = static_cast(hdec.out[0]); + break; + } + avail_bits -= hdec.lens >> 2; + } else if (dst + len > dst_end) { + r = dst_end - dst - static_cast(len); + if (r > LSHPACK_ERR_MORE_BUF) { + r = LSHPACK_ERR_MORE_BUF; + } + return r; + } else { + // This must be an invalid code, otherwise it would have fit + return -1; + } + } + + if (avail_bits > 0) { + // ATS: unlike upstream ls-hpack, also reject padding of 8 or more bits + // (possible here after the final symbol consumed only part of the tail). + // RFC 7541 5.2: "A padding strictly longer than 7 bits MUST be treated as + // a decoding error." This keeps the strictness of the 4-bit FSM decoder. + if (avail_bits >= 8 || ((1u << avail_bits) - 1) != (buf & ((1u << avail_bits) - 1))) { + return -1; // Not EOF as expected + } + } + +end: + return dst - orig_dst; + +slow_path: + // Find previous byte boundary and finish decoding thence. + while ((avail_bits & 7) && dst > orig_dst) { + avail_bits += encode_table[static_cast(*--dst)].bits; + } + assert((avail_bits & 7) == 0); + src -= avail_bits >> 3; + r = lshpack_dec_huff_decode_full(src, static_cast(src_end - src), dst, static_cast(dst_end - dst)); + if (r >= 0) { + return dst - orig_dst + r; + } + return r; +} + } // namespace litespeed diff --git a/lib/ls-hpack/lshpack.h b/lib/ls-hpack/lshpack.h index 7069317b28a..3a4696a0ae7 100644 --- a/lib/ls-hpack/lshpack.h +++ b/lib/ls-hpack/lshpack.h @@ -65,4 +65,7 @@ int64_t lshpack_enc_huff_encode(uint8_t const *src, int64_t lshpack_dec_huff_decode_full(uint8_t const *src, uint32_t src_len, char *dst, uint32_t dst_len); +int64_t lshpack_dec_huff_decode(uint8_t const *src, uint32_t src_len, + char *dst, uint32_t dst_len); + } // namespace litespeed diff --git a/src/proxy/hdrs/HuffmanCodec.cc b/src/proxy/hdrs/HuffmanCodec.cc index e8a14275841..5f144d7294e 100644 --- a/src/proxy/hdrs/HuffmanCodec.cc +++ b/src/proxy/hdrs/HuffmanCodec.cc @@ -26,11 +26,11 @@ #include // Implementation taken from LiteSpeed: -// lshpack_dec_huff_decode_full +// lshpack_dec_huff_decode int64_t huffman_decode(char *dst, uint32_t dst_len, const uint8_t *src, uint32_t src_len) { - return litespeed::lshpack_dec_huff_decode_full(src, src_len, dst, dst_len); + return litespeed::lshpack_dec_huff_decode(src, src_len, dst, dst_len); } // Implementation taken from LiteSpeed: diff --git a/src/proxy/hdrs/unit_tests/test_Huffmancode.cc b/src/proxy/hdrs/unit_tests/test_Huffmancode.cc index 4b942d1f889..e80346a047e 100644 --- a/src/proxy/hdrs/unit_tests/test_Huffmancode.cc +++ b/src/proxy/hdrs/unit_tests/test_Huffmancode.cc @@ -22,10 +22,13 @@ */ #include "proxy/hdrs/HuffmanCodec.h" +#include "ls-hpack/lshpack.h" #include #include #include #include +#include +#include #include using namespace std; @@ -207,3 +210,114 @@ TEST_CASE("decode_errors", "[proxy][huffman]") free(dst); } } + +TEST_CASE("decode_known_vectors", "[proxy][huffman]") +{ + for (const auto &i : huffman_encode_test_data) { + char dst[64]; + int64_t len = huffman_decode(dst, sizeof(dst), i.expect, i.expect_len); + + REQUIRE(len == i.src_len); + REQUIRE(memcmp(dst, i.src, len) == 0); + } +} + +// The fast table-based decoder must agree with the original 4-bit FSM decoder +// on input validity and decoded content. The full decoder run with a roomy +// destination serves as the oracle. The one permitted difference is an +// exactly-sized destination (dst_len == decoded length): depending on how the +// trailing padding falls on nibble boundaries, either decoder may report +// LSHPACK_ERR_MORE_BUF where the other succeeds. One byte of headroom +// guarantees success for both; ATS callers always allocate twice the encoded +// length, which is strictly more than any decoded length. Sentinel bytes +// verify nothing is written past dst_len. +static void +require_decoder_parity(const uint8_t *src, uint32_t src_len, uint32_t dst_len) +{ + constexpr size_t SENTINEL_LEN = 64; + std::vector oracle_buf(2 * src_len + 8); + int64_t oracle = litespeed::lshpack_dec_huff_decode_full(src, src_len, oracle_buf.data(), oracle_buf.size()); + + std::vector fast_buf(dst_len + SENTINEL_LEN, '\xa5'); + int64_t fast = litespeed::lshpack_dec_huff_decode(src, src_len, fast_buf.data(), dst_len); + + if (oracle < 0 || static_cast(oracle) > dst_len) { + REQUIRE(fast < 0); + } else if (static_cast(oracle) == dst_len && fast < 0) { + REQUIRE(fast == -3); // LSHPACK_ERR_MORE_BUF on an exact-fit destination + } else { + REQUIRE(fast == oracle); + REQUIRE(memcmp(fast_buf.data(), oracle_buf.data(), oracle) == 0); + } + for (size_t i = dst_len; i < dst_len + SENTINEL_LEN; ++i) { + REQUIRE(fast_buf[i] == '\xa5'); + } +} + +TEST_CASE("decoder_parity_exhaustive_short", "[proxy][huffman]") +{ + uint8_t src[2]; + + for (uint32_t hi = 0; hi < 256; ++hi) { + src[0] = static_cast(hi); + for (uint32_t dst_len = 0; dst_len <= 4; ++dst_len) { + require_decoder_parity(src, 1, dst_len); + } + for (uint32_t lo = 0; lo < 256; ++lo) { + src[1] = static_cast(lo); + require_decoder_parity(src, 2, 8); + } + } +} + +TEST_CASE("decoder_parity_fuzz", "[proxy][huffman]") +{ + std::mt19937 rng(0x5eed); + std::uniform_int_distribution len_dist(0, 64); + std::uniform_int_distribution byte_dist(0, 255); + uint8_t src[64]; + + for (int i = 0; i < 100000; ++i) { + int src_len = len_dist(rng); + for (int j = 0; j < src_len; ++j) { + src[j] = static_cast(byte_dist(rng)); + } + // Bias some iterations toward trailing 0xff runs to exercise the + // EOS-prefix and padding paths. + if (i % 4 == 0 && src_len > 0) { + for (int j = byte_dist(rng) % src_len; j < src_len; ++j) { + src[j] = 0xff; + } + } + // Mostly roomy destination, sometimes a tight one to exercise the + // bounds-checked path and LSHPACK_ERR_MORE_BUF. + uint32_t dst_len = (i % 8 == 0) ? static_cast(byte_dist(rng) % 16) : static_cast(2 * src_len + 8); + require_decoder_parity(src, src_len, dst_len); + } +} + +TEST_CASE("decoder_roundtrip_fuzz", "[proxy][huffman]") +{ + std::mt19937 rng(0xc0ffee); + std::uniform_int_distribution len_dist(0, 300); + std::uniform_int_distribution byte_dist(0, 255); + + for (int i = 0; i < 20000; ++i) { + uint32_t src_len = static_cast(len_dist(rng)); + std::vector src(src_len); + for (auto &c : src) { + c = static_cast(byte_dist(rng)); + } + + // Worst-case Huffman expansion is 30 bits per input byte. + std::vector encoded(src_len * 4 + 8); + int64_t enc_len = huffman_encode(encoded.data(), encoded.size(), src.data(), src_len); + REQUIRE(enc_len >= 0); + + // One byte of headroom guarantees success (see require_decoder_parity). + std::vector decoded(src_len + 1); + int64_t dec_len = huffman_decode(decoded.data(), src_len + 1, encoded.data(), enc_len); + REQUIRE(dec_len == static_cast(src_len)); + REQUIRE(memcmp(decoded.data(), src.data(), src_len) == 0); + } +} diff --git a/tools/benchmark/CMakeLists.txt b/tools/benchmark/CMakeLists.txt index 49f25fad1c1..1fbb08555e8 100644 --- a/tools/benchmark/CMakeLists.txt +++ b/tools/benchmark/CMakeLists.txt @@ -47,3 +47,6 @@ target_link_libraries( ts::inkcache ts::inkhostdb ) + +add_executable(benchmark_HuffmanDecode benchmark_HuffmanDecode.cc) +target_link_libraries(benchmark_HuffmanDecode PRIVATE Catch2::Catch2WithMain lshpack) diff --git a/tools/benchmark/benchmark_HuffmanDecode.cc b/tools/benchmark/benchmark_HuffmanDecode.cc new file mode 100644 index 00000000000..188501a5f07 --- /dev/null +++ b/tools/benchmark/benchmark_HuffmanDecode.cc @@ -0,0 +1,142 @@ +/** @file + + Benchmark comparing the two ls-hpack Huffman decoders: the original 4-bit + FSM decoder (lshpack_dec_huff_decode_full) and the 16-bit table decoder + (lshpack_dec_huff_decode). + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include "ls-hpack/lshpack.h" + +#include +#include +#include +#include + +#define CATCH_CONFIG_ENABLE_BENCHMARKING +#include +#include + +namespace +{ + +// Representative header field values seen on the wire. +constexpr std::string_view SHORT_VALUE = "text/css"; +constexpr std::string_view MEDIUM_VALUE = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"; +constexpr std::string_view LONG_VALUE = + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"; +constexpr std::string_view CORPUS[] = { + SHORT_VALUE, + MEDIUM_VALUE, + LONG_VALUE, + "/", + "gzip, deflate, br, zstd", + "Mon, 21 Oct 2013 20:13:21 GMT", + "https://www.example.com/images/2026/06/some-article/hero.webp?w=1200&q=75", + "max-age=31536000, public, immutable", + "session=8f14e45fceea167a5a36dedd4bea2543; theme=dark; region=us-west-2; ab_bucket=37", + "ATS/10.1.0", +}; + +std::vector +encode(std::string_view text) +{ + std::vector encoded(text.size() * 4 + 8); + int64_t len = litespeed::lshpack_enc_huff_encode(reinterpret_cast(text.data()), // + reinterpret_cast(text.data()) + text.size(), encoded.data(), + static_cast(encoded.size())); + REQUIRE(len > 0); + encoded.resize(len); + return encoded; +} + +} // namespace + +TEST_CASE("huffman decode: full vs fast", "[bench][huffman]") +{ + char dst[4096]; + + // Sanity: both decoders agree on the corpus. + for (auto text : CORPUS) { + auto encoded = encode(text); + int64_t full = litespeed::lshpack_dec_huff_decode_full(encoded.data(), encoded.size(), dst, sizeof(dst)); + REQUIRE(full == static_cast(text.size())); + REQUIRE(memcmp(dst, text.data(), text.size()) == 0); + int64_t fast = litespeed::lshpack_dec_huff_decode(encoded.data(), encoded.size(), dst, sizeof(dst)); + REQUIRE(fast == full); + REQUIRE(memcmp(dst, text.data(), text.size()) == 0); + } + + auto short_enc = encode(SHORT_VALUE); + auto medium_enc = encode(MEDIUM_VALUE); + auto long_enc = encode(LONG_VALUE); + + BENCHMARK("full: short (8B)") + { + return litespeed::lshpack_dec_huff_decode_full(short_enc.data(), short_enc.size(), dst, sizeof(dst)); + }; + BENCHMARK("fast: short (8B)") + { + return litespeed::lshpack_dec_huff_decode(short_enc.data(), short_enc.size(), dst, sizeof(dst)); + }; + + BENCHMARK("full: medium (86B)") + { + return litespeed::lshpack_dec_huff_decode_full(medium_enc.data(), medium_enc.size(), dst, sizeof(dst)); + }; + BENCHMARK("fast: medium (86B)") + { + return litespeed::lshpack_dec_huff_decode(medium_enc.data(), medium_enc.size(), dst, sizeof(dst)); + }; + + BENCHMARK("full: long (113B)") + { + return litespeed::lshpack_dec_huff_decode_full(long_enc.data(), long_enc.size(), dst, sizeof(dst)); + }; + BENCHMARK("fast: long (113B)") + { + return litespeed::lshpack_dec_huff_decode(long_enc.data(), long_enc.size(), dst, sizeof(dst)); + }; + + std::vector> corpus_enc; + size_t corpus_bytes = 0; + for (auto text : CORPUS) { + corpus_enc.push_back(encode(text)); + corpus_bytes += text.size(); + } + WARN("corpus: " << corpus_bytes << " decoded bytes per iteration"); + + BENCHMARK("full: corpus") + { + int64_t acc = 0; + for (auto const &encoded : corpus_enc) { + acc += litespeed::lshpack_dec_huff_decode_full(encoded.data(), encoded.size(), dst, sizeof(dst)); + } + return acc; + }; + BENCHMARK("fast: corpus") + { + int64_t acc = 0; + for (auto const &encoded : corpus_enc) { + acc += litespeed::lshpack_dec_huff_decode(encoded.data(), encoded.size(), dst, sizeof(dst)); + } + return acc; + }; +} From e612e9f1babae2045f5763a31f3db2cc02f78b28 Mon Sep 17 00:00:00 2001 From: "Phong X. Nguyen" Date: Thu, 11 Jun 2026 16:53:13 +0000 Subject: [PATCH 2/3] bump source version of ls-hpack --- lib/ls-hpack/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ls-hpack/README.md b/lib/ls-hpack/README.md index b6ba2c769f5..4b22af4631d 100644 --- a/lib/ls-hpack/README.md +++ b/lib/ls-hpack/README.md @@ -23,4 +23,4 @@ Huffman decoding acceleration. The current implementation pulled into ATS is based upon what is currently the latest version, -[v2.3.4](https://github.com/litespeedtech/ls-hpack/releases/tag/v2.3.4). +[v2.3.5](https://github.com/litespeedtech/ls-hpack/releases/tag/v2.3.5). From 3572b1ae9db6e73f17eb07f1b67140b2c96a9c9f Mon Sep 17 00:00:00 2001 From: "Phong X. Nguyen" Date: Thu, 11 Jun 2026 17:12:18 +0000 Subject: [PATCH 3/3] Address review findings for the fast Huffman decoder Document the RFC 7541 padding divergence in the vendored README and pin it with a deterministic test (the fuzz coverage of that branch was seed dependent). Document huffman_decode's buffer sizing contract in the header. Export LSHPACK_ERR_MORE_BUF as upstream does instead of a test literal. Scope the lib/ include path to the consumers that need it rather than exporting it from the lshpack target. Consolidate the per-byte sentinel assertions, cutting the parity tests' assertion count from 10.7M to 0.4M. Co-Authored-By: Claude Fable 5 --- include/proxy/hdrs/HuffmanCodec.h | 10 ++++++ lib/ls-hpack/CMakeLists.txt | 1 - lib/ls-hpack/README.md | 14 +++++++++ lib/ls-hpack/lshpack.cc | 4 +-- lib/ls-hpack/lshpack.h | 3 ++ src/proxy/hdrs/CMakeLists.txt | 2 ++ src/proxy/hdrs/unit_tests/test_Huffmancode.cc | 31 ++++++++++++++++--- tools/benchmark/CMakeLists.txt | 1 + 8 files changed, 59 insertions(+), 7 deletions(-) diff --git a/include/proxy/hdrs/HuffmanCodec.h b/include/proxy/hdrs/HuffmanCodec.h index e979bdc3151..9d1aa7316f4 100644 --- a/include/proxy/hdrs/HuffmanCodec.h +++ b/include/proxy/hdrs/HuffmanCodec.h @@ -25,5 +25,15 @@ #include +/** Decode a Huffman-encoded string per RFC 7541 section 5.2. + + @return The decoded length, or a negative value on invalid input or + insufficient destination space. + + @note dst_len must be strictly greater than the decoded length; with an + exactly-sized destination the decoder may report insufficient space. + Huffman expands to at most 8/5 of the encoded length, so sizing dst at + 2x src_len always suffices (see xpack_decode_string). + */ int64_t huffman_decode(char *dst, uint32_t dst_len, uint8_t const *src, uint32_t src_len); int64_t huffman_encode(uint8_t *dst, uint32_t dst_len, uint8_t const *src, uint32_t src_len); diff --git a/lib/ls-hpack/CMakeLists.txt b/lib/ls-hpack/CMakeLists.txt index ae4327c5f06..eb4a78bd0c2 100644 --- a/lib/ls-hpack/CMakeLists.txt +++ b/lib/ls-hpack/CMakeLists.txt @@ -17,7 +17,6 @@ add_library(lshpack STATIC) target_sources(lshpack PRIVATE lshpack.cc huff-tables.h lshpack.h) -target_include_directories(lshpack PUBLIC ${CMAKE_SOURCE_DIR}/lib) # lshpack.h uses C99 array designators (see the hencs array in huff-tables.h). # In theory this limits portability because very old compilers that are C89 only diff --git a/lib/ls-hpack/README.md b/lib/ls-hpack/README.md index 4b22af4631d..2b7ccb1948e 100644 --- a/lib/ls-hpack/README.md +++ b/lib/ls-hpack/README.md @@ -24,3 +24,17 @@ Huffman decoding acceleration. The current implementation pulled into ATS is based upon what is currently the latest version, [v2.3.5](https://github.com/litespeedtech/ls-hpack/releases/tag/v2.3.5). + +# ATS Modifications + +The code is kept as close to upstream as practical, with one deliberate +behavioral divergence that any future re-sync must preserve: + +- `lshpack_dec_huff_decode` (the fast decoder) rejects Huffman padding of 8 or + more bits, as required by RFC 7541 section 5.2. Upstream's fast decoder + accepts such padding when it follows the final symbol near the end of the + input; the 4-bit FSM decoder (`lshpack_dec_huff_decode_full`) has always + rejected it. See the commented tail check in `lshpack.cc` and the + `decode_overlong_padding` test in + `src/proxy/hdrs/unit_tests/test_Huffmancode.cc`, which fails if the check is + dropped. diff --git a/lib/ls-hpack/lshpack.cc b/lib/ls-hpack/lshpack.cc index f45ff1a5098..b1d4167c654 100644 --- a/lib/ls-hpack/lshpack.cc +++ b/lib/ls-hpack/lshpack.cc @@ -52,6 +52,7 @@ SOFTWARE. */ #include "huff-tables.h" +#include "lshpack.h" #include #include #include @@ -63,8 +64,7 @@ namespace litespeed { namespace { -constexpr int64_t LSHPACK_ERR_MORE_BUF = -3; -constexpr unsigned SHORTEST_CODE = 5; +constexpr unsigned SHORTEST_CODE = 5; struct decode_status { uint8_t state; diff --git a/lib/ls-hpack/lshpack.h b/lib/ls-hpack/lshpack.h index 3a4696a0ae7..c64c1f19cc9 100644 --- a/lib/ls-hpack/lshpack.h +++ b/lib/ls-hpack/lshpack.h @@ -59,6 +59,9 @@ SOFTWARE. namespace litespeed { +// Matches upstream lshpack.h's LSHPACK_ERR_MORE_BUF. +constexpr int64_t LSHPACK_ERR_MORE_BUF = -3; + int64_t lshpack_enc_huff_encode(uint8_t const *src, uint8_t const *const src_end, uint8_t *dst, uint32_t dst_len); diff --git a/src/proxy/hdrs/CMakeLists.txt b/src/proxy/hdrs/CMakeLists.txt index 45a084dd6ee..4d036a654d8 100644 --- a/src/proxy/hdrs/CMakeLists.txt +++ b/src/proxy/hdrs/CMakeLists.txt @@ -64,6 +64,8 @@ if(BUILD_TESTING) lshpack configmanager ) + # For the ls-hpack decoder parity tests. + target_include_directories(test_proxy_hdrs PRIVATE ${CMAKE_SOURCE_DIR}/lib) add_catch2_test(NAME test_proxy_hdrs COMMAND test_proxy_hdrs) add_executable(test_proxy_hdrs_xpack unit_tests/test_XPACK.cc) diff --git a/src/proxy/hdrs/unit_tests/test_Huffmancode.cc b/src/proxy/hdrs/unit_tests/test_Huffmancode.cc index e80346a047e..6a7c2cb22a3 100644 --- a/src/proxy/hdrs/unit_tests/test_Huffmancode.cc +++ b/src/proxy/hdrs/unit_tests/test_Huffmancode.cc @@ -23,6 +23,7 @@ #include "proxy/hdrs/HuffmanCodec.h" #include "ls-hpack/lshpack.h" +#include #include #include #include @@ -244,14 +245,12 @@ require_decoder_parity(const uint8_t *src, uint32_t src_len, uint32_t dst_len) if (oracle < 0 || static_cast(oracle) > dst_len) { REQUIRE(fast < 0); } else if (static_cast(oracle) == dst_len && fast < 0) { - REQUIRE(fast == -3); // LSHPACK_ERR_MORE_BUF on an exact-fit destination + REQUIRE(fast == litespeed::LSHPACK_ERR_MORE_BUF); // exact-fit destination } else { REQUIRE(fast == oracle); REQUIRE(memcmp(fast_buf.data(), oracle_buf.data(), oracle) == 0); } - for (size_t i = dst_len; i < dst_len + SENTINEL_LEN; ++i) { - REQUIRE(fast_buf[i] == '\xa5'); - } + REQUIRE(std::all_of(fast_buf.begin() + dst_len, fast_buf.end(), [](char c) { return c == '\xa5'; })); } TEST_CASE("decoder_parity_exhaustive_short", "[proxy][huffman]") @@ -296,6 +295,30 @@ TEST_CASE("decoder_parity_fuzz", "[proxy][huffman]") } } +// Streams whose final symbol is followed by 8 or more bits of all-ones +// padding. RFC 7541 5.2 requires treating padding longer than 7 bits as a +// decoding error. Upstream ls-hpack's fast decoder accepts these; the ATS +// copy deliberately rejects them (see the tail check in +// lib/ls-hpack/lshpack.cc), and this test pins that divergence against a +// future re-sync. The exhaustive parity test cannot cover this branch: it is +// only reachable with inputs of four or more bytes. +TEST_CASE("decode_overlong_padding", "[proxy][huffman]") +{ + const static struct { + const uint8_t *input; + uint32_t input_len; + } test_cases[] = { + {reinterpret_cast("\xbf\xd0\x37\xd9\xff"), 5 }, + {reinterpret_cast("\xd9\x68\x63\x84\x4c\xfe\x77\xa0\xbc\xc9\xff"), 11}, + }; + + for (const auto &tc : test_cases) { + char dst[64]; + REQUIRE(litespeed::lshpack_dec_huff_decode(tc.input, tc.input_len, dst, sizeof(dst)) == -1); + REQUIRE(litespeed::lshpack_dec_huff_decode_full(tc.input, tc.input_len, dst, sizeof(dst)) == -1); + } +} + TEST_CASE("decoder_roundtrip_fuzz", "[proxy][huffman]") { std::mt19937 rng(0xc0ffee); diff --git a/tools/benchmark/CMakeLists.txt b/tools/benchmark/CMakeLists.txt index 1fbb08555e8..e58e8dcf26b 100644 --- a/tools/benchmark/CMakeLists.txt +++ b/tools/benchmark/CMakeLists.txt @@ -50,3 +50,4 @@ target_link_libraries( add_executable(benchmark_HuffmanDecode benchmark_HuffmanDecode.cc) target_link_libraries(benchmark_HuffmanDecode PRIVATE Catch2::Catch2WithMain lshpack) +target_include_directories(benchmark_HuffmanDecode PRIVATE ${CMAKE_SOURCE_DIR}/lib)