diff --git a/include/proxy/hdrs/HuffmanCodec.h b/include/proxy/hdrs/HuffmanCodec.h
index e979bdc3151..9d1aa7316f4 100644
--- a/include/proxy/hdrs/HuffmanCodec.h
+++ b/include/proxy/hdrs/HuffmanCodec.h
@@ -25,5 +25,15 @@
 
 #include <cstdint>
 
+/** Decode a Huffman-encoded string per RFC 7541 section 5.2.
+
+    @return The decoded length, or a negative value on invalid input or
+      insufficient destination space.
+
+    @note dst_len must be strictly greater than the decoded length; with an
+      exactly-sized destination the decoder may report insufficient space.
+      Huffman expands to at most 8/5 of the encoded length, so sizing dst at
+      2x src_len always suffices (see xpack_decode_string).
+ */
 int64_t huffman_decode(char *dst, uint32_t dst_len, uint8_t const *src, uint32_t src_len);
 int64_t huffman_encode(uint8_t *dst, uint32_t dst_len, uint8_t const *src, uint32_t src_len);
diff --git a/lib/ls-hpack/README.md b/lib/ls-hpack/README.md
index b6ba2c769f5..2b7ccb1948e 100644
--- a/lib/ls-hpack/README.md
+++ b/lib/ls-hpack/README.md
@@ -23,4 +23,18 @@ Huffman decoding acceleration.
 
 The current implementation pulled into ATS is based upon what is currently the
 latest version,
-[v2.3.4](https://github.com/litespeedtech/ls-hpack/releases/tag/v2.3.4).
+[v2.3.5](https://github.com/litespeedtech/ls-hpack/releases/tag/v2.3.5).
+
+# ATS Modifications
+
+The code is kept as close to upstream as practical, with one deliberate
+behavioral divergence that any future re-sync must preserve:
+
+- `lshpack_dec_huff_decode` (the fast decoder) rejects Huffman padding of 8 or
+  more bits, as required by RFC 7541 section 5.2. Upstream's fast decoder
+  accepts such padding when it follows the final symbol near the end of the
+  input; the 4-bit FSM decoder (`lshpack_dec_huff_decode_full`) has always
+  rejected it. See the commented tail check in `lshpack.cc` and the
+  `decode_overlong_padding` test in
+  `src/proxy/hdrs/unit_tests/test_Huffmancode.cc`, which fails if the check is
+  dropped.
diff --git a/lib/ls-hpack/lshpack.cc b/lib/ls-hpack/lshpack.cc
index 6e7f0ee0e6f..b1d4167c654 100644
--- a/lib/ls-hpack/lshpack.cc
+++ b/lib/ls-hpack/lshpack.cc
@@ -52,6 +52,9 @@ SOFTWARE.
  */
 
 #include "huff-tables.h"
+#include "lshpack.h"
+#include <cassert>
+#include <cstddef>
 #include <cstdint>
 #include <cstring>
 
@@ -61,7 +64,7 @@ namespace litespeed {
 namespace
 {
 
-constexpr int64_t LSHPACK_ERR_MORE_BUF = -3;
+constexpr unsigned SHORTEST_CODE = 5;
 
 struct decode_status {
   uint8_t state;
@@ -243,4 +246,189 @@ lshpack_dec_huff_decode_full(uint8_t const *src, uint32_t src_len, char *dst, ui
   return p_dst - dst;
 }
 
+// Implementation taken from LiteSpeed:
+// lshpack_dec_huff_decode
+//
+// The decoder is optimized for the common case.  Most of the time, we decode
+// data whose encoding is 16 bits or shorter.  This lets us use a 64 KB table
+// indexed by two bytes of input and outputs 1, 2, or 3 bytes at a time.
+//
+// In the case a longer code is encountered, we fall back to the original
+// Huffman decoder that supports all code lengths.
+int64_t
+lshpack_dec_huff_decode(uint8_t const *src, uint32_t src_len, char *dst, uint32_t dst_len)
+{
+  char *const          orig_dst = dst;
+  uint8_t const *const src_end  = src + src_len;
+  char *const          dst_end  = dst + dst_len;
+  uintptr_t            buf      = 0;
+  unsigned             avail_bits, len;
+  struct hdec          hdec = {0, {0, 0, 0}};
+  uint16_t             idx;
+  int64_t              r;
+
+  avail_bits = 0;
+  while (true) {
+    if (src + sizeof(buf) <= src_end) {
+      len         = (sizeof(buf) * 8 - avail_bits) >> 3;
+      avail_bits += len << 3;
+      switch (len) {
+#if UINTPTR_MAX == 18446744073709551615ull
+      case 8:
+        buf <<= 8;
+        buf  |= static_cast<uintptr_t>(*src++);
+        [[fallthrough]];
+      case 7:
+        buf <<= 8;
+        buf  |= static_cast<uintptr_t>(*src++);
+        [[fallthrough]];
+      default:
+        buf <<= 48;
+        buf  |= static_cast<uintptr_t>(*src++) << 40;
+        buf  |= static_cast<uintptr_t>(*src++) << 32;
+        buf  |= static_cast<uintptr_t>(*src++) << 24;
+        buf  |= static_cast<uintptr_t>(*src++) << 16;
+#else
+        [[fallthrough]];
+      case 4:
+        buf <<= 8;
+        buf  |= static_cast<uintptr_t>(*src++);
+        [[fallthrough]];
+      case 3:
+        buf <<= 8;
+        buf  |= static_cast<uintptr_t>(*src++);
+        [[fallthrough]];
+      default:
+        buf <<= 16;
+#endif
+        buf |= static_cast<uintptr_t>(*src++) << 8;
+        buf |= static_cast<uintptr_t>(*src++) << 0;
+      }
+    } else if (src < src_end) {
+      do {
+        buf       <<= 8;
+        buf        |= static_cast<uintptr_t>(*src++);
+        avail_bits += 8;
+      } while (src < src_end && avail_bits <= sizeof(buf) * 8 - 8);
+    } else {
+      break; // Normal case terminating condition: out of input
+    }
+
+    if (dst_end - dst >= static_cast<ptrdiff_t>(8 * sizeof(buf) / SHORTEST_CODE) && avail_bits >= 16) {
+      // Fast path: don't check destination bounds
+      do {
+        idx         = static_cast<uint16_t>(buf >> (avail_bits - 16));
+        hdec        = hdecs[idx];
+        dst[0]      = static_cast<char>(hdec.out[0]);
+        dst[1]      = static_cast<char>(hdec.out[1]);
+        dst[2]      = static_cast<char>(hdec.out[2]);
+        dst        += hdec.lens & 3;
+        avail_bits -= hdec.lens >> 2;
+      } while (avail_bits >= 16 && hdec.lens);
+      if (avail_bits < 16) {
+        continue;
+      }
+      goto slow_path;
+    } else {
+      while (avail_bits >= 16) {
+        idx  = static_cast<uint16_t>(buf >> (avail_bits - 16));
+        hdec = hdecs[idx];
+        len  = hdec.lens & 3;
+        if (len && dst + len <= dst_end) {
+          switch (len) {
+          case 3:
+            *dst++ = static_cast<char>(hdec.out[0]);
+            *dst++ = static_cast<char>(hdec.out[1]);
+            *dst++ = static_cast<char>(hdec.out[2]);
+            break;
+          case 2:
+            *dst++ = static_cast<char>(hdec.out[0]);
+            *dst++ = static_cast<char>(hdec.out[1]);
+            break;
+          default:
+            *dst++ = static_cast<char>(hdec.out[0]);
+            break;
+          }
+          avail_bits -= hdec.lens >> 2;
+        } else if (dst + len > dst_end) {
+          r = dst_end - dst - static_cast<ptrdiff_t>(len);
+          if (r > LSHPACK_ERR_MORE_BUF) {
+            r = LSHPACK_ERR_MORE_BUF;
+          }
+          return r;
+        } else {
+          goto slow_path;
+        }
+      }
+    }
+  }
+
+  if (avail_bits >= SHORTEST_CODE) {
+    idx  = static_cast<uint16_t>(buf << (16 - avail_bits));
+    idx |= (1 << (16 - avail_bits)) - 1; // EOF
+    if (idx == 0xFFFF && avail_bits < 8) {
+      goto end;
+    }
+    // If a byte or more of input is left, this means there is a valid
+    // encoding, not just EOF.
+    hdec = hdecs[idx];
+    len  = hdec.lens & 3;
+    if ((static_cast<unsigned>(hdec.lens) >> 2) > avail_bits) {
+      return -1;
+    }
+    if (len && dst + len <= dst_end) {
+      switch (len) {
+      case 3:
+        *dst++ = static_cast<char>(hdec.out[0]);
+        *dst++ = static_cast<char>(hdec.out[1]);
+        *dst++ = static_cast<char>(hdec.out[2]);
+        break;
+      case 2:
+        *dst++ = static_cast<char>(hdec.out[0]);
+        *dst++ = static_cast<char>(hdec.out[1]);
+        break;
+      default:
+        *dst++ = static_cast<char>(hdec.out[0]);
+        break;
+      }
+      avail_bits -= hdec.lens >> 2;
+    } else if (dst + len > dst_end) {
+      r = dst_end - dst - static_cast<ptrdiff_t>(len);
+      if (r > LSHPACK_ERR_MORE_BUF) {
+        r = LSHPACK_ERR_MORE_BUF;
+      }
+      return r;
+    } else {
+      // This must be an invalid code, otherwise it would have fit
+      return -1;
+    }
+  }
+
+  if (avail_bits > 0) {
+    // ATS: unlike upstream ls-hpack, also reject padding of 8 or more bits
+    // (possible here after the final symbol consumed only part of the tail).
+    // RFC 7541 5.2: "A padding strictly longer than 7 bits MUST be treated as
+    // a decoding error." This keeps the strictness of the 4-bit FSM decoder.
+    if (avail_bits >= 8 || ((1u << avail_bits) - 1) != (buf & ((1u << avail_bits) - 1))) {
+      return -1; // Not EOF as expected
+    }
+  }
+
+end:
+  return dst - orig_dst;
+
+slow_path:
+  // Find previous byte boundary and finish decoding thence.
+  while ((avail_bits & 7) && dst > orig_dst) {
+    avail_bits += encode_table[static_cast<uint8_t>(*--dst)].bits;
+  }
+  assert((avail_bits & 7) == 0);
+  src -= avail_bits >> 3;
+  r = lshpack_dec_huff_decode_full(src, static_cast<uint32_t>(src_end - src), dst, static_cast<uint32_t>(dst_end - dst));
+  if (r >= 0) {
+    return dst - orig_dst + r;
+  }
+  return r;
+}
+
 } // namespace litespeed
diff --git a/lib/ls-hpack/lshpack.h b/lib/ls-hpack/lshpack.h
index 7069317b28a..c64c1f19cc9 100644
--- a/lib/ls-hpack/lshpack.h
+++ b/lib/ls-hpack/lshpack.h
@@ -59,10 +59,16 @@ SOFTWARE.
 
 namespace litespeed {
 
+// Matches upstream lshpack.h's LSHPACK_ERR_MORE_BUF.
+constexpr int64_t LSHPACK_ERR_MORE_BUF = -3;
+
 int64_t lshpack_enc_huff_encode(uint8_t const *src,
     uint8_t const *const src_end, uint8_t *dst, uint32_t dst_len);
 
 int64_t lshpack_dec_huff_decode_full(uint8_t const *src, uint32_t src_len,
                                  char *dst, uint32_t dst_len);
 
+int64_t lshpack_dec_huff_decode(uint8_t const *src, uint32_t src_len,
+                                 char *dst, uint32_t dst_len);
+
 } // namespace litespeed
diff --git a/src/proxy/hdrs/CMakeLists.txt b/src/proxy/hdrs/CMakeLists.txt
index 45a084dd6ee..4d036a654d8 100644
--- a/src/proxy/hdrs/CMakeLists.txt
+++ b/src/proxy/hdrs/CMakeLists.txt
@@ -64,6 +64,8 @@ if(BUILD_TESTING)
             lshpack
             configmanager
   )
+  # For the ls-hpack decoder parity tests.
+  target_include_directories(test_proxy_hdrs PRIVATE ${CMAKE_SOURCE_DIR}/lib)
   add_catch2_test(NAME test_proxy_hdrs COMMAND test_proxy_hdrs)
 
   add_executable(test_proxy_hdrs_xpack unit_tests/test_XPACK.cc)
diff --git a/src/proxy/hdrs/HuffmanCodec.cc b/src/proxy/hdrs/HuffmanCodec.cc
index e8a14275841..5f144d7294e 100644
--- a/src/proxy/hdrs/HuffmanCodec.cc
+++ b/src/proxy/hdrs/HuffmanCodec.cc
@@ -26,11 +26,11 @@
 #include <cstdint>
 
 // Implementation taken from LiteSpeed:
-// lshpack_dec_huff_decode_full
+// lshpack_dec_huff_decode
 int64_t
 huffman_decode(char *dst, uint32_t dst_len, const uint8_t *src, uint32_t src_len)
 {
-  return litespeed::lshpack_dec_huff_decode_full(src, src_len, dst, dst_len);
+  return litespeed::lshpack_dec_huff_decode(src, src_len, dst, dst_len);
 }
 
 // Implementation taken from LiteSpeed:
diff --git a/src/proxy/hdrs/unit_tests/test_Huffmancode.cc b/src/proxy/hdrs/unit_tests/test_Huffmancode.cc
index 4b942d1f889..6a7c2cb22a3 100644
--- a/src/proxy/hdrs/unit_tests/test_Huffmancode.cc
+++ b/src/proxy/hdrs/unit_tests/test_Huffmancode.cc
@@ -22,10 +22,14 @@
 */
 
 #include "proxy/hdrs/HuffmanCodec.h"
+#include "ls-hpack/lshpack.h"
+#include <algorithm>
 #include <cstdlib>
 #include <iostream>
 #include <cassert>
 #include <cstring>
+#include <random>
+#include <vector>
 #include <catch2/catch_test_macros.hpp>
 
 using namespace std;
@@ -207,3 +211,136 @@ TEST_CASE("decode_errors", "[proxy][huffman]")
     free(dst);
   }
 }
+
+TEST_CASE("decode_known_vectors", "[proxy][huffman]")
+{
+  for (const auto &i : huffman_encode_test_data) {
+    char    dst[64];
+    int64_t len = huffman_decode(dst, sizeof(dst), i.expect, i.expect_len);
+
+    REQUIRE(len == i.src_len);
+    REQUIRE(memcmp(dst, i.src, len) == 0);
+  }
+}
+
+// The fast table-based decoder must agree with the original 4-bit FSM decoder
+// on input validity and decoded content. The full decoder run with a roomy
+// destination serves as the oracle. The one permitted difference is an
+// exactly-sized destination (dst_len == decoded length): depending on how the
+// trailing padding falls on nibble boundaries, either decoder may report
+// LSHPACK_ERR_MORE_BUF where the other succeeds. One byte of headroom
+// guarantees success for both; ATS callers always allocate twice the encoded
+// length, which is strictly more than any decoded length. Sentinel bytes
+// verify nothing is written past dst_len.
+static void
+require_decoder_parity(const uint8_t *src, uint32_t src_len, uint32_t dst_len)
+{
+  constexpr size_t  SENTINEL_LEN = 64;
+  std::vector<char> oracle_buf(2 * src_len + 8);
+  int64_t           oracle = litespeed::lshpack_dec_huff_decode_full(src, src_len, oracle_buf.data(), oracle_buf.size());
+
+  std::vector<char> fast_buf(dst_len + SENTINEL_LEN, '\xa5');
+  int64_t           fast = litespeed::lshpack_dec_huff_decode(src, src_len, fast_buf.data(), dst_len);
+
+  if (oracle < 0 || static_cast<uint64_t>(oracle) > dst_len) {
+    REQUIRE(fast < 0);
+  } else if (static_cast<uint64_t>(oracle) == dst_len && fast < 0) {
+    REQUIRE(fast == litespeed::LSHPACK_ERR_MORE_BUF); // exact-fit destination
+  } else {
+    REQUIRE(fast == oracle);
+    REQUIRE(memcmp(fast_buf.data(), oracle_buf.data(), oracle) == 0);
+  }
+  REQUIRE(std::all_of(fast_buf.begin() + dst_len, fast_buf.end(), [](char c) { return c == '\xa5'; }));
+}
+
+TEST_CASE("decoder_parity_exhaustive_short", "[proxy][huffman]")
+{
+  uint8_t src[2];
+
+  for (uint32_t hi = 0; hi < 256; ++hi) {
+    src[0] = static_cast<uint8_t>(hi);
+    for (uint32_t dst_len = 0; dst_len <= 4; ++dst_len) {
+      require_decoder_parity(src, 1, dst_len);
+    }
+    for (uint32_t lo = 0; lo < 256; ++lo) {
+      src[1] = static_cast<uint8_t>(lo);
+      require_decoder_parity(src, 2, 8);
+    }
+  }
+}
+
+TEST_CASE("decoder_parity_fuzz", "[proxy][huffman]")
+{
+  std::mt19937                       rng(0x5eed);
+  std::uniform_int_distribution<int> len_dist(0, 64);
+  std::uniform_int_distribution<int> byte_dist(0, 255);
+  uint8_t                            src[64];
+
+  for (int i = 0; i < 100000; ++i) {
+    int src_len = len_dist(rng);
+    for (int j = 0; j < src_len; ++j) {
+      src[j] = static_cast<uint8_t>(byte_dist(rng));
+    }
+    // Bias some iterations toward trailing 0xff runs to exercise the
+    // EOS-prefix and padding paths.
+    if (i % 4 == 0 && src_len > 0) {
+      for (int j = byte_dist(rng) % src_len; j < src_len; ++j) {
+        src[j] = 0xff;
+      }
+    }
+    // Mostly roomy destination, sometimes a tight one to exercise the
+    // bounds-checked path and LSHPACK_ERR_MORE_BUF.
+    uint32_t dst_len = (i % 8 == 0) ? static_cast<uint32_t>(byte_dist(rng) % 16) : static_cast<uint32_t>(2 * src_len + 8);
+    require_decoder_parity(src, src_len, dst_len);
+  }
+}
+
+// Streams whose final symbol is followed by 8 or more bits of all-ones
+// padding. RFC 7541 5.2 requires treating padding longer than 7 bits as a
+// decoding error. Upstream ls-hpack's fast decoder accepts these; the ATS
+// copy deliberately rejects them (see the tail check in
+// lib/ls-hpack/lshpack.cc), and this test pins that divergence against a
+// future re-sync. The exhaustive parity test cannot cover this branch: it is
+// only reachable with inputs of four or more bytes.
+TEST_CASE("decode_overlong_padding", "[proxy][huffman]")
+{
+  const static struct {
+    const uint8_t *input;
+    uint32_t       input_len;
+  } test_cases[] = {
+    {reinterpret_cast<const uint8_t *>("\xbf\xd0\x37\xd9\xff"),                         5 },
+    {reinterpret_cast<const uint8_t *>("\xd9\x68\x63\x84\x4c\xfe\x77\xa0\xbc\xc9\xff"), 11},
+  };
+
+  for (const auto &tc : test_cases) {
+    char dst[64];
+    REQUIRE(litespeed::lshpack_dec_huff_decode(tc.input, tc.input_len, dst, sizeof(dst)) == -1);
+    REQUIRE(litespeed::lshpack_dec_huff_decode_full(tc.input, tc.input_len, dst, sizeof(dst)) == -1);
+  }
+}
+
+TEST_CASE("decoder_roundtrip_fuzz", "[proxy][huffman]")
+{
+  std::mt19937                       rng(0xc0ffee);
+  std::uniform_int_distribution<int> len_dist(0, 300);
+  std::uniform_int_distribution<int> byte_dist(0, 255);
+
+  for (int i = 0; i < 20000; ++i) {
+    uint32_t             src_len = static_cast<uint32_t>(len_dist(rng));
+    std::vector<uint8_t> src(src_len);
+    for (auto &c : src) {
+      c = static_cast<uint8_t>(byte_dist(rng));
+    }
+
+    // Worst-case Huffman expansion is 30 bits per input byte.
+    std::vector<uint8_t> encoded(src_len * 4 + 8);
+    int64_t              enc_len = huffman_encode(encoded.data(), encoded.size(), src.data(), src_len);
+    REQUIRE(enc_len >= 0);
+
+    // One byte of headroom guarantees success (see require_decoder_parity).
+    std::vector<char> decoded(src_len + 1);
+    int64_t           dec_len = huffman_decode(decoded.data(), src_len + 1, encoded.data(), enc_len);
+    REQUIRE(dec_len == static_cast<int64_t>(src_len));
+    REQUIRE(memcmp(decoded.data(), src.data(), src_len) == 0);
+  }
+}
diff --git a/tools/benchmark/CMakeLists.txt b/tools/benchmark/CMakeLists.txt
index 49f25fad1c1..e58e8dcf26b 100644
--- a/tools/benchmark/CMakeLists.txt
+++ b/tools/benchmark/CMakeLists.txt
@@ -47,3 +47,7 @@ target_link_libraries(
           ts::inkcache
           ts::inkhostdb
 )
+
+add_executable(benchmark_HuffmanDecode benchmark_HuffmanDecode.cc)
+target_link_libraries(benchmark_HuffmanDecode PRIVATE Catch2::Catch2WithMain lshpack)
+target_include_directories(benchmark_HuffmanDecode PRIVATE ${CMAKE_SOURCE_DIR}/lib)
diff --git a/tools/benchmark/benchmark_HuffmanDecode.cc b/tools/benchmark/benchmark_HuffmanDecode.cc
new file mode 100644
index 00000000000..188501a5f07
--- /dev/null
+++ b/tools/benchmark/benchmark_HuffmanDecode.cc
@@ -0,0 +1,142 @@
+/** @file
+
+  Benchmark comparing the two ls-hpack Huffman decoders: the original 4-bit
+  FSM decoder (lshpack_dec_huff_decode_full) and the 16-bit table decoder
+  (lshpack_dec_huff_decode).
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+ */
+
+#include "ls-hpack/lshpack.h"
+
+#include <cstdint>
+#include <cstring>
+#include <string_view>
+#include <vector>
+
+#define CATCH_CONFIG_ENABLE_BENCHMARKING
+#include <catch2/catch_test_macros.hpp>
+#include <catch2/benchmark/catch_benchmark.hpp>
+
+namespace
+{
+
+// Representative header field values seen on the wire.
+constexpr std::string_view SHORT_VALUE  = "text/css";
+constexpr std::string_view MEDIUM_VALUE = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
+constexpr std::string_view LONG_VALUE =
+  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36";
+constexpr std::string_view CORPUS[] = {
+  SHORT_VALUE,
+  MEDIUM_VALUE,
+  LONG_VALUE,
+  "/",
+  "gzip, deflate, br, zstd",
+  "Mon, 21 Oct 2013 20:13:21 GMT",
+  "https://www.example.com/images/2026/06/some-article/hero.webp?w=1200&q=75",
+  "max-age=31536000, public, immutable",
+  "session=8f14e45fceea167a5a36dedd4bea2543; theme=dark; region=us-west-2; ab_bucket=37",
+  "ATS/10.1.0",
+};
+
+std::vector<uint8_t>
+encode(std::string_view text)
+{
+  std::vector<uint8_t> encoded(text.size() * 4 + 8);
+  int64_t              len = litespeed::lshpack_enc_huff_encode(reinterpret_cast<const uint8_t *>(text.data()), //
+                                                                reinterpret_cast<const uint8_t *>(text.data()) + text.size(), encoded.data(),
+                                                                static_cast<uint32_t>(encoded.size()));
+  REQUIRE(len > 0);
+  encoded.resize(len);
+  return encoded;
+}
+
+} // namespace
+
+TEST_CASE("huffman decode: full vs fast", "[bench][huffman]")
+{
+  char dst[4096];
+
+  // Sanity: both decoders agree on the corpus.
+  for (auto text : CORPUS) {
+    auto    encoded = encode(text);
+    int64_t full    = litespeed::lshpack_dec_huff_decode_full(encoded.data(), encoded.size(), dst, sizeof(dst));
+    REQUIRE(full == static_cast<int64_t>(text.size()));
+    REQUIRE(memcmp(dst, text.data(), text.size()) == 0);
+    int64_t fast = litespeed::lshpack_dec_huff_decode(encoded.data(), encoded.size(), dst, sizeof(dst));
+    REQUIRE(fast == full);
+    REQUIRE(memcmp(dst, text.data(), text.size()) == 0);
+  }
+
+  auto short_enc  = encode(SHORT_VALUE);
+  auto medium_enc = encode(MEDIUM_VALUE);
+  auto long_enc   = encode(LONG_VALUE);
+
+  BENCHMARK("full: short (8B)")
+  {
+    return litespeed::lshpack_dec_huff_decode_full(short_enc.data(), short_enc.size(), dst, sizeof(dst));
+  };
+  BENCHMARK("fast: short (8B)")
+  {
+    return litespeed::lshpack_dec_huff_decode(short_enc.data(), short_enc.size(), dst, sizeof(dst));
+  };
+
+  BENCHMARK("full: medium (86B)")
+  {
+    return litespeed::lshpack_dec_huff_decode_full(medium_enc.data(), medium_enc.size(), dst, sizeof(dst));
+  };
+  BENCHMARK("fast: medium (86B)")
+  {
+    return litespeed::lshpack_dec_huff_decode(medium_enc.data(), medium_enc.size(), dst, sizeof(dst));
+  };
+
+  BENCHMARK("full: long (113B)")
+  {
+    return litespeed::lshpack_dec_huff_decode_full(long_enc.data(), long_enc.size(), dst, sizeof(dst));
+  };
+  BENCHMARK("fast: long (113B)")
+  {
+    return litespeed::lshpack_dec_huff_decode(long_enc.data(), long_enc.size(), dst, sizeof(dst));
+  };
+
+  std::vector<std::vector<uint8_t>> corpus_enc;
+  size_t                            corpus_bytes = 0;
+  for (auto text : CORPUS) {
+    corpus_enc.push_back(encode(text));
+    corpus_bytes += text.size();
+  }
+  WARN("corpus: " << corpus_bytes << " decoded bytes per iteration");
+
+  BENCHMARK("full: corpus")
+  {
+    int64_t acc = 0;
+    for (auto const &encoded : corpus_enc) {
+      acc += litespeed::lshpack_dec_huff_decode_full(encoded.data(), encoded.size(), dst, sizeof(dst));
+    }
+    return acc;
+  };
+  BENCHMARK("fast: corpus")
+  {
+    int64_t acc = 0;
+    for (auto const &encoded : corpus_enc) {
+      acc += litespeed::lshpack_dec_huff_decode(encoded.data(), encoded.size(), dst, sizeof(dst));
+    }
+    return acc;
+  };
+}