From f6afc9ad8ebc9699858f62c82528429fb27d3e6b Mon Sep 17 00:00:00 2001 From: Kim Altintop Date: Tue, 26 May 2026 12:54:16 +0200 Subject: [PATCH 1/3] commitlog: Truncate segment before resuming `read_exact` doesn't distinguish between EOF and not enough bytes to fill the given buffer. We may thus consider a segment valid to be resumed, but leave in trailing bytes (less than the commit header length). That can cause silent data loss, because appending more data will render the segment corrupt: a restart will then start a new segment, leaving anything after the trailing bytes unreachable. To fix this, truncate the segment to the size determined by `Metadata::extract` before resuming writes. --- crates/commitlog/src/repo/mod.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/crates/commitlog/src/repo/mod.rs b/crates/commitlog/src/repo/mod.rs index 3d79f7f1e28..23c9a97df6e 100644 --- a/crates/commitlog/src/repo/mod.rs +++ b/crates/commitlog/src/repo/mod.rs @@ -384,7 +384,15 @@ pub fn resume_segment_writer( max_commit: _, } = meta; let mut writer = repo.open_segment_writer(offset)?; + // Ensure that the segment's size is exactly what we determined. + // + // When `Metadata` encounters EOF, it could be that there actually are + // trailing bytes in the segment, but less than the commit header + // length. This is difficult to detect due to the use of `read_exact`, + // so ensure we remove any trailing bytes. + writer.ftruncate(tx_range.end, size_in_bytes)?; // Ensure we have enough space for this segment. + // // The segment could have been created without the `fallocate` feature // enabled, so we call this here again to ensure writes can't fail due // to ENOSPC. From 5b4919d8193007e082e5825a4ec94559080c25ac Mon Sep 17 00:00:00 2001 From: Kim Altintop Date: Tue, 26 May 2026 17:12:53 +0200 Subject: [PATCH 2/3] Add test --- crates/commitlog/tests/random_payload/mod.rs | 63 +++++++++++++++++++- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/crates/commitlog/tests/random_payload/mod.rs b/crates/commitlog/tests/random_payload/mod.rs index 752ab885836..ff149443c13 100644 --- a/crates/commitlog/tests/random_payload/mod.rs +++ b/crates/commitlog/tests/random_payload/mod.rs @@ -1,7 +1,9 @@ +use std::io::Write; + use log::info; -use spacetimedb_commitlog::repo::Repo; +use spacetimedb_commitlog::repo::{Repo, SegmentLen}; use spacetimedb_commitlog::tests::helpers::enable_logging; -use spacetimedb_commitlog::{commitlog, payload, repo, Commitlog, Options}; +use spacetimedb_commitlog::{commit, commitlog, payload, repo, Commitlog, Options}; use spacetimedb_paths::server::CommitLogDir; use spacetimedb_paths::FromPathUnchecked; use tempfile::tempdir; @@ -196,3 +198,60 @@ fn resume_empty_segment() { } } } + +/// Tests that resuming a segment that has trailing bytes smaller than a +/// commitlog header causes those trailing bytes to be removed. +/// +/// Regression test for https://github.com/clockworklabs/SpacetimeDB/pull/5116 +#[test] +fn resume_small_trailing_garbage() { + enable_logging(); + + let root = tempdir().unwrap(); + let path = CommitLogDir::from_path_unchecked(root.path()); + + let repo = repo::Fs::new(path, None).unwrap(); + // Write some data. + { + let mut clog = commitlog::Generic::open(&repo, <_>::default()).unwrap(); + for (i, payload) in compressible_payloads().take(1024).enumerate() { + clog.commit([(i as u64, payload)]).unwrap(); + clog.flush().unwrap(); + clog.sync(); + } + } + + // Add some extra bytes, less than the commit header length. + let last_segment_size = { + let segments = repo.existing_offsets().unwrap(); + let mut last_segment = repo.open_segment_writer(segments.last().copied().unwrap()).unwrap(); + last_segment.write_all(&[67u8; commit::Header::LEN - 1]).unwrap(); + last_segment.flush().unwrap(); + last_segment.sync_all().unwrap(); + last_segment.segment_len().unwrap() + }; + { + let mut clog = commitlog::Generic::open(&repo, <_>::default()).unwrap(); + + // The extra bytes should have been truncated away. + let segments = repo.existing_offsets().unwrap(); + let mut last_segment = repo.open_segment_writer(segments.last().copied().unwrap()).unwrap(); + assert_eq!( + last_segment.segment_len().unwrap(), + last_segment_size - (commit::Header::LEN - 1) as u64 + ); + + // Add some more data. + for (i, payload) in compressible_payloads() + .take(1024) + .enumerate() + .map(|(offset, payload)| (offset + 1024, payload)) + { + clog.commit([(i as u64, payload)]).unwrap(); + clog.flush().unwrap(); + clog.sync(); + } + + assert_eq!(2048, clog.commits_from(0).map(Result::unwrap).count()); + } +} From 56e46c80685c30db44574dddad9eefe8362d7d32 Mon Sep 17 00:00:00 2001 From: Kim Altintop Date: Wed, 27 May 2026 12:00:25 +0200 Subject: [PATCH 3/3] Assert that we're not truncating more than a commit header --- crates/commitlog/src/repo/mod.rs | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/crates/commitlog/src/repo/mod.rs b/crates/commitlog/src/repo/mod.rs index 23c9a97df6e..af04ad918bb 100644 --- a/crates/commitlog/src/repo/mod.rs +++ b/crates/commitlog/src/repo/mod.rs @@ -8,7 +8,7 @@ use spacetimedb_fs_utils::compression::Zstd; pub use spacetimedb_fs_utils::compression::{CompressOnce, CompressionStats}; use crate::{ - commit::Commit, + commit::{self, Commit}, error, index::{IndexFile, IndexFileMut}, segment::{self, FileLike, Header, Metadata, OffsetIndexWriter, Reader, Writer}, @@ -375,14 +375,6 @@ pub fn resume_segment_writer( if reader.sealed() { Ok(ResumedSegment::Sealed(meta)) } else { - let Metadata { - header: _, - tx_range, - size_in_bytes, - max_epoch, - max_commit_offset: _, - max_commit: _, - } = meta; let mut writer = repo.open_segment_writer(offset)?; // Ensure that the segment's size is exactly what we determined. // @@ -390,7 +382,15 @@ pub fn resume_segment_writer( // trailing bytes in the segment, but less than the commit header // length. This is difficult to detect due to the use of `read_exact`, // so ensure we remove any trailing bytes. - writer.ftruncate(tx_range.end, size_in_bytes)?; + // + // To be extra cautious, check that no more than the header length + // bytes are left over before truncating the segment. This is an assert + // because it would be a bug in `Metadata::extract`. + assert!( + writer.segment_len()? < meta.size_in_bytes + commit::Header::LEN as u64, + "{repo}: trailing bytes exceed commit header length in segment {offset}" + ); + writer.ftruncate(meta.tx_range.end, meta.size_in_bytes)?; // Ensure we have enough space for this segment. // // The segment could have been created without the `fallocate` feature @@ -402,15 +402,15 @@ pub fn resume_segment_writer( Ok(ResumedSegment::Resumed(Writer { commit: Commit { - min_tx_offset: tx_range.end, + min_tx_offset: meta.tx_range.end, n: 0, records: Vec::new(), - epoch: max_epoch, + epoch: meta.max_epoch, }, inner: io::BufWriter::new(writer), - min_tx_offset: tx_range.start, - bytes_written: size_in_bytes, + min_tx_offset: meta.tx_range.start, + bytes_written: meta.size_in_bytes, offset_index_head: create_offset_index_writer(repo, offset, opts), }))