From b25914760ba85f2f064a42f1888fbd816042d7e0 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Thu, 4 Jun 2026 15:49:14 +1000 Subject: [PATCH] token: drain the matched-block insert deflate (#951) send_deflated_token() adds a matched block to the compressor history with deflate(Z_INSERT_ONLY). Our bundled zlib implements Z_INSERT_ONLY (it produces no output and consumes the input in one call), but a build against a system zlib lacks it and falls back to Z_SYNC_FLUSH (see the top of the file), which emits a flush block into obuf. For a large incompressible matched token that block exceeds AVAIL_OUT_SIZE(CHUNK_SIZE), so deflate returned with avail_in != 0 and the transfer aborted: "deflate on token returned 0 (N bytes left)" at token.c The insert output is never sent -- the receiver rebuilds the matching history itself in see_deflate_token() -- so loop, resetting the output buffer, and discard it. Drain with the same condition as the data loop above: until the input is consumed AND avail_out != 0. Stopping at avail_in == 0 alone can leave pending output in the deflate stream (a full output buffer with bytes still buffered), which would then be emitted by the next real deflate send and corrupt the stream. A bundled-zlib build still finishes in one iteration. Fixes: #951 --- testsuite/compress-zlib-insert_test.py | 72 ++++++++++++++++++++++++++ token.c | 31 ++++++++--- 2 files changed, 95 insertions(+), 8 deletions(-) create mode 100644 testsuite/compress-zlib-insert_test.py diff --git a/testsuite/compress-zlib-insert_test.py b/testsuite/compress-zlib-insert_test.py new file mode 100644 index 000000000..a7be60d99 --- /dev/null +++ b/testsuite/compress-zlib-insert_test.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +# Regression test for issue #951. +# +# When rsync is built against a system zlib (no bundled Z_INSERT_ONLY +# extension), send_deflated_token() falls back to Z_SYNC_FLUSH to add a +# matched block to the compressor history -- but Z_SYNC_FLUSH emits a flush +# block into a fixed-size obuf. A large incompressible matched block +# overflowed obuf and aborted the transfer with +# "deflate on token returned 0 (N bytes left)" at token.c +# The fix loops, discarding the (never-sent) output, until the input is +# consumed. A bundled-zlib build emits no output here, so this test passes +# on either build; it is RED only on a pre-fix system-zlib build. +# +# The matched-block insert path needs all of: --compress-choice=zlib (the +# only method that feeds matched blocks into the deflate history), a large +# --block-size so a single matched token exceeds obuf, incompressible +# (random) data, and a delta over a real connection (compression is skipped +# for purely local transfers). We assert the upload SUCCEEDS *and* the +# result matches the source, so the fix is verified correct, not merely +# non-crashing. + +import filecmp +import shutil +import subprocess + +from rsyncfns import ( + SCRATCHDIR, make_data_file, makepath, rmtree, rsync_argv, + start_test_daemon, test_fail, write_daemon_conf, +) + +DAEMON_PORT = 12922 +SIZE = 8 * 1024 * 1024 # enough blocks to exercise many inserts +# 65535 (0xffff) is a single insert fragment larger than the deflate output +# buffer (AVAIL_OUT_SIZE(CHUNK_SIZE) ~= 32816). It exercises both failure +# modes of the pre-fix code: the obuf overflow abort, and -- once that is +# loop-drained -- pending insert output left in the stream that leaks into +# the next send. A block that splits into chunks ending in a tiny fragment +# (e.g. 131072 = 65535+65535+2) would hide the pending case. +BLOCK = 65535 + +moddir = SCRATCHDIR / 'zmod' +srcdir = SCRATCHDIR / 'zsrc' +rmtree(moddir) +rmtree(srcdir) +makepath(moddir) +makepath(srcdir) + +# Source is incompressible. The basis (already in the module) is the same +# data with a few bytes changed in one block, so every other 128KB block +# matches exactly and is sent as a token -> the deflate insert path. +make_data_file(srcdir / 'big.dat', SIZE) +shutil.copy(srcdir / 'big.dat', moddir / 'big.dat') +with open(srcdir / 'big.dat', 'r+b') as f: + f.seek(SIZE // 2 + 1000) + f.write(b'\x00' * 32) + +conf = write_daemon_conf([('zmod', {'path': str(moddir), 'read only': 'no'})]) +url = start_test_daemon(conf, DAEMON_PORT) + 'zmod/' + +# -I forces the delta even though the basis has the same size (otherwise the +# quick check skips the file and the matched-block insert path never runs). +proc = subprocess.run( + rsync_argv('-zI', '--compress-choice=zlib', '--no-whole-file', + f'--block-size={BLOCK}', str(srcdir / 'big.dat'), url), + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) +if proc.returncode != 0: + print(proc.stdout) + test_fail(f"zlib delta upload failed (rc={proc.returncode}); " + "regression of #951 deflate-token overflow") + +if not filecmp.cmp(srcdir / 'big.dat', moddir / 'big.dat', shallow=False): + test_fail("uploaded file differs from source -- zlib delta corruption") diff --git a/token.c b/token.c index 62ffae151..f910f74b3 100644 --- a/token.c +++ b/token.c @@ -481,14 +481,29 @@ send_deflated_token(int f, int32 token, struct map_struct *buf, OFF_T offset, in tx_strm.avail_in = n1; if (protocol_version >= 31) /* Newer protocols avoid a data-duplicating bug */ offset += n1; - tx_strm.next_out = (Bytef *) obuf; - tx_strm.avail_out = AVAIL_OUT_SIZE(CHUNK_SIZE); - r = deflate(&tx_strm, Z_INSERT_ONLY); - if (r != Z_OK || tx_strm.avail_in != 0) { - rprintf(FERROR, "deflate on token returned %d (%d bytes left)\n", - r, tx_strm.avail_in); - exit_cleanup(RERR_STREAMIO); - } + /* With our bundled zlib's Z_INSERT_ONLY this produces no + * output and consumes the input in one call. A build + * against a system zlib lacks Z_INSERT_ONLY and falls back + * to Z_SYNC_FLUSH (see top of file), which emits a flush + * block we discard -- and for an incompressible token that + * block can exceed obuf. Loop, resetting the output buffer, + * until all the input is consumed so a large token can't + * overflow obuf and abort the transfer (#951). Drain until + * avail_out != 0 too: a full output buffer can leave pending + * bytes that would otherwise leak into the next real deflate + * send and corrupt the stream (same condition as the data loop + * above). The discarded output is not sent: the receiver + * rebuilds the matching history itself in see_deflate_token(). */ + do { + tx_strm.next_out = (Bytef *) obuf; + tx_strm.avail_out = AVAIL_OUT_SIZE(CHUNK_SIZE); + r = deflate(&tx_strm, Z_INSERT_ONLY); + if (r != Z_OK) { + rprintf(FERROR, "deflate on token returned %d (%d bytes left)\n", + r, tx_strm.avail_in); + exit_cleanup(RERR_STREAMIO); + } + } while (tx_strm.avail_in != 0 || tx_strm.avail_out == 0); } while (toklen > 0); } }