From 6e95b07e5fd82d248ed7985cd8d45bb20a12aba9 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 13 May 2026 09:31:13 +0200 Subject: [PATCH 1/5] builtin/maintenance: fix locking with "--detach" When running git-maintenance(1), we create a lockfile that is supposed to keep other maintenance processes from running at the same time. This lockfile is broken though in case the "--detach" flag is passed: the lockfile is created by the parent process and will be cleaned up either manually or on exit. But when detaching, the parent will exit before all of the background maintenance tasks have been run, and consequently the lock only covers a smaller part of the whole maintenance process. Fix this bug by reassigning all tempfiles from the parent process to the child process when daemonizing so that it becomes the responsibility of the child to clean them up. Note that this is a broader fix, as we now always reassign tempfiles when daemonizing. This is a natural consequence of the semantics of `daemonize()` though, as it essentially promises to continue running the current process in the background. It is thus sensible to have that function perform the whole dance of assigning resources to the child process, including tempfiles. There's only a single other caller in "daemon.c", but that process doesn't create any tempfiles before the call to `daemonize()` and is thus not impacted by this change. Reported-by: Jean-Christophe Manciot Helped-by: Jeff King Helped-by: Derrick Stolee Co-authored-by: Taylor Blau Signed-off-by: Taylor Blau Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- setup.c | 16 +++++++++++- setup.h | 15 +++++++++++ t/t7900-maintenance.sh | 58 ++++++++++++++++++++++++++++++++++++++++++ tempfile.c | 12 +++++++++ tempfile.h | 11 ++++++++ 5 files changed, 111 insertions(+), 1 deletion(-) diff --git a/setup.c b/setup.c index 7ec4427368a2a7..14445a71a4297e 100644 --- a/setup.c +++ b/setup.c @@ -2162,12 +2162,26 @@ int daemonize(void) errno = ENOSYS; return -1; #else - switch (fork()) { + pid_t parent_pid = getpid(); + pid_t child_pid = fork(); + + switch (child_pid) { case 0: + /* + * We're in the child process, so we take ownership of + * all tempfiles. + */ + reassign_tempfile_ownership(parent_pid, getpid()); break; case -1: die_errno(_("fork failed")); default: + /* + * We're in the parent process, so we drop ownership of + * all tempfiles to prevent us from removing them upon + * exit. + */ + reassign_tempfile_ownership(parent_pid, child_pid); exit(0); } if (setsid() == -1) diff --git a/setup.h b/setup.h index 80bc6e5f078af8..b5bc5f280c34f0 100644 --- a/setup.h +++ b/setup.h @@ -149,6 +149,21 @@ void verify_non_filename(const char *prefix, const char *name); int path_inside_repo(const char *prefix, const char *path); void sanitize_stdfds(void); + +/* + * Daemonize the current process by forking and then exiting the parent + * process. Returns 0 when successful, in which case the parent process will + * have exited and it's the child process that continues to run the code. + * Otherwise, a negative error code is returned and the parent process will + * continue execution. + * + * Note that this function will also perform the following changes: + * + * - Standard file descriptors in the child process are closed. + * - The child process is made a session leader via setsid(3p). + * - All tempfiles owned by the parent process are reassigned to the + * daemonized child process. + */ int daemonize(void); /* diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index 4700beacc18281..df0bbc16692987 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -1438,6 +1438,64 @@ test_expect_success '--no-detach causes maintenance to not run in background' ' ) ' +test_expect_success PIPE '--detach holds maintenance lock until daemonized child exits' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + + git config maintenance.auto false && + git config core.lockfilepid true && + + git remote add origin /does/not/exist && + git config set remote.origin.uploadpack "cat fifo-uploadpack" && + + mkfifo fifo-uploadpack fifo-maint && + + # Open the maintenance FIFO, as otherwise spawning + # git-maintenance(1) would block. Note that we need to open it + # as read-write, as otherwise we would block here already. + exec 9<>fifo-maint && + + { git maintenance run --task=prefetch --detach 7>&9 & } && + parent="$!" && + + # Reap the parent process so that the exec call below will not + # get SIGCHLD. + wait "$parent" && + + # Open the git-upload-pack(1) FIFO for writing, which will + # block until the upload-pack script opens it for reading. Once + # exec returns, we know that the daemonized child is alive and + # pinned. + exec 8>fifo-uploadpack && + + test_path_is_file .git/objects/maintenance.lock && + test_path_is_file .git/objects/"maintenance~pid.lock" && + + # Verify that the maintenance.lock still exists, and + # that it was created by the parent process, not the + # child. + echo "pid $parent" >expect && + test_cmp expect .git/objects/"maintenance~pid.lock" && + + # Reopen the maintenance FIFO as read-only so that + # git-maintenance(1) is the only writer. This will cause it to + # close the FIFO once the process exits. + exec 9<&- && + exec 9&- && + cat <&9 && + + test_path_is_missing .git/objects/maintenance.lock && + test_path_is_missing .git/objects/"maintenance~pid.lock" + ) +' + test_expect_success '--detach causes maintenance to run in background' ' test_when_finished "rm -rf repo" && git init repo && diff --git a/tempfile.c b/tempfile.c index 82dfa3d82f2ac9..f0fdf582794ba5 100644 --- a/tempfile.c +++ b/tempfile.c @@ -373,3 +373,15 @@ int delete_tempfile(struct tempfile **tempfile_p) return err ? -1 : 0; } + +void reassign_tempfile_ownership(pid_t from, pid_t to) +{ + volatile struct volatile_list_head *pos; + + list_for_each(pos, &tempfile_list) { + struct tempfile *p = list_entry(pos, struct tempfile, list); + + if (is_tempfile_active(p) && p->owner == from) + p->owner = to; + } +} diff --git a/tempfile.h b/tempfile.h index 2d2ae5b657d4a9..2227a095fd4289 100644 --- a/tempfile.h +++ b/tempfile.h @@ -282,4 +282,15 @@ int delete_tempfile(struct tempfile **tempfile_p); */ int rename_tempfile(struct tempfile **tempfile_p, const char *path); +/* + * Reassign ownership of all active tempfiles whose `owner` field matches + * `from` to `to`. + * + * This is intended for use by `daemonize()`; after `fork(2)`-ing, the parent + * transfers ownership to the daemonized child so that its atexit handler does + * not unlink tempfiles that should outlive it, and the child claims the + * inherited tempfiles so that they are cleaned up when the daemon exits. + */ +void reassign_tempfile_ownership(pid_t from, pid_t to); + #endif /* TEMPFILE_H */ From 29364f16242abd490160f66d2d239ac45e1d45fb Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 13 May 2026 09:31:14 +0200 Subject: [PATCH 2/5] run-command: honor "gc.auto" for auto-maintenance The "gc.auto" configuration has traditionally been used to turn off running git-gc(1) as part of our auto-maintenance. We have eventually switched over to git-maintenance(1) in a95ce12430 (maintenance: replace run_auto_gc(), 2020-09-17), and with 1942d48380 (maintenance: optionally skip --auto process, 2020-08-28) we have introduced "maintenance.auto" to control whether or not to run auto-maintenance. At that point though we still shelled out to git-gc(1) internally. So if "gc.auto=0" was set we would still _execute_ git-maintenance(1), but the command would have exited fast because git-gc(1) itself knew to honor the config key. This has recently changed though, as we have adapted the default maintenance strategy to not use git-gc(1) anymore. The consequence is that "gc.auto=0" doesn't have an effect anymore, which is a somewhat surprising change in behaviour for our users. Adapt `run_auto_maintenance()` so that it knows to also read "gc.auto", similar to how it also reads both "maintenance.autoDetach" and "gc.autoDetach". Reported-by: Jean-Christophe Manciot Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- run-command.c | 10 +++++++--- t/t7900-maintenance.sh | 25 +++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/run-command.c b/run-command.c index c146a56532a139..28202a81d83d8b 100644 --- a/run-command.c +++ b/run-command.c @@ -1944,10 +1944,14 @@ void run_processes_parallel(const struct run_process_parallel_opts *opts) int prepare_auto_maintenance(struct repository *r, int quiet, struct child_process *maint) { - int enabled, auto_detach; + int enabled = 1, auto_detach; - if (!repo_config_get_bool(r, "maintenance.auto", &enabled) && - !enabled) + if (repo_config_get_bool(r, "maintenance.auto", &enabled)) { + int gc_threshold; + if (!repo_config_get_int(r, "gc.auto", &gc_threshold)) + enabled = gc_threshold > 0; + } + if (!enabled) return 0; /* diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index df0bbc16692987..97c8c701bbc911 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -73,6 +73,31 @@ test_expect_success 'maintenance.auto config option' ' test_subcommand ! git maintenance run --auto --quiet --detach Date: Fri, 15 May 2026 22:16:22 -0400 Subject: [PATCH 3/5] apply: plug leak on "patch too large" error In apply_patch(), we return immediately if read_patch_file() returns an error. Traditionally this was OK, since an error from strbuf_read() would restore the strbuf to its unallocated state. But since f1c0e3946e (apply: reject patches larger than ~1 GiB, 2022-10-25), we may also return an error if we successfully read the patch but it is too large. In this case we leak the strbuf contents when apply_patch() returns. You can see it in action by running t4141 under LSan with the EXPENSIVE prereq enabled. We can fix this in one of two places: 1. In read_patch_file(), we could release the buffer before returning the error, behaving more like a raw strbuf_read() call. 2. In apply_patch(), we can release the strbuf ourselves before returning. I picked the latter, since it future proofs us against read_patch_file() getting new error modes. We also have a cleanup label in that function already, so now our error handling at this spot matches the rest of the function (and all of the variables are initialized such that the rest of the cleanup is correctly a noop at this point). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- apply.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/apply.c b/apply.c index 4aa1694cfaa2f0..249248d4f205ca 100644 --- a/apply.c +++ b/apply.c @@ -4881,8 +4881,10 @@ static int apply_patch(struct apply_state *state, state->patch_input_file = filename; state->linenr = 1; - if (read_patch_file(&buf, fd) < 0) - return -128; + if (read_patch_file(&buf, fd) < 0) { + res = -128; + goto end; + } offset = 0; while (offset < buf.len) { struct patch *patch; From 65ea197dca35363e7ee312620e5484473f95bbb4 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 15 May 2026 22:23:10 -0400 Subject: [PATCH 4/5] commit: handle large commit messages in utf8 verification Running t4205 under UBSan with the EXPENSIVE prereq enabled triggers an error when we try to create a commit message that is over 2GB: commit.c:1574:6: runtime error: signed integer overflow: -2147483648 - 1 cannot be represented in type 'int' The problem is that find_invalid_utf8() is not prepared to handle large buffers, as it uses an "int" to represent buffer sizes and offsets. We can fix this with a few changes: 1. We'll take in "len" as a size_t (which is what the caller has anyway, since it's working with a strbuf). 2. We need to return a size_t to give the offset to the invalid utf8, but we also need a sentinel value for "no invalid value" (previously "-1"). Let's split these to return a bool for "found invalid utf8" and then pass back the offset as an out-parameter. We'll switch the function name to match the new semantics. 3. The caller in verify_utf8() uses a "long" to store buffer positions, which is a bit funny. This goes back to 08a94a145c (commit/commit-tree: correct latin1 to utf-8, 2012-06-28) and is perhaps trying to match our use of "unsigned long" for object sizes (though we don't care about it ever becoming negative here). This should be a size_t, too, as some platforms (like Windows) still use a 32-bit long on machines with 64-bit pointers. 4. The "bytes" field within find_invalid_utf() does not have range problems. It is the number of bytes the utf8 sequence claims to have, so is limited by how many bits can be set in a single 8-bit byte. However, if we leave it as an "int" then the compiler will complain about the sign mismatch when comparing it to "len". So let's make it unsigned, too. All of this is a little silly, of course, because 2GB text commit messages are clearly nonsense. So we might consider rejecting them outright, but it is easy enough to make these helper functions more robust in the meantime. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- commit.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/commit.c b/commit.c index 80d8d078757dbc..cd49aada40747c 100644 --- a/commit.c +++ b/commit.c @@ -1558,16 +1558,16 @@ int commit_tree(const char *msg, size_t msg_len, const struct object_id *tree, return result; } -static int find_invalid_utf8(const char *buf, int len) +static bool has_invalid_utf8(const char *buf, size_t len, size_t *bad_offset) { - int offset = 0; + size_t offset = 0; static const unsigned int max_codepoint[] = { 0x7f, 0x7ff, 0xffff, 0x10ffff }; while (len) { unsigned char c = *buf++; - int bytes, bad_offset; + unsigned bytes; unsigned int codepoint; unsigned int min_val, max_val; @@ -1578,7 +1578,7 @@ static int find_invalid_utf8(const char *buf, int len) if (c < 0x80) continue; - bad_offset = offset-1; + *bad_offset = offset-1; /* * Count how many more high bits set: that's how @@ -1595,11 +1595,11 @@ static int find_invalid_utf8(const char *buf, int len) * codepoints beyond U+10FFFF, which are guaranteed never to exist. */ if (bytes < 1 || 3 < bytes) - return bad_offset; + return true; /* Do we *have* that many bytes? */ if (len < bytes) - return bad_offset; + return true; /* * Place the encoded bits at the bottom of the value and compute the @@ -1617,23 +1617,23 @@ static int find_invalid_utf8(const char *buf, int len) codepoint <<= 6; codepoint |= *buf & 0x3f; if ((*buf++ & 0xc0) != 0x80) - return bad_offset; + return true; } while (--bytes); /* Reject codepoints that are out of range for the sequence length. */ if (codepoint < min_val || codepoint > max_val) - return bad_offset; + return true; /* Surrogates are only for UTF-16 and cannot be encoded in UTF-8. */ if ((codepoint & 0x1ff800) == 0xd800) - return bad_offset; + return true; /* U+xxFFFE and U+xxFFFF are guaranteed non-characters. */ if ((codepoint & 0xfffe) == 0xfffe) - return bad_offset; + return true; /* So are anything in the range U+FDD0..U+FDEF. */ if (codepoint >= 0xfdd0 && codepoint <= 0xfdef) - return bad_offset; + return true; } - return -1; + return false; } /* @@ -1645,15 +1645,14 @@ static int find_invalid_utf8(const char *buf, int len) static int verify_utf8(struct strbuf *buf) { int ok = 1; - long pos = 0; + size_t pos = 0; for (;;) { - int bad; + size_t bad; unsigned char c; unsigned char replace[2]; - bad = find_invalid_utf8(buf->buf + pos, buf->len - pos); - if (bad < 0) + if (!has_invalid_utf8(buf->buf + pos, buf->len - pos, &bad)) return ok; pos += bad; ok = 0; From 6a4418c36d6bad69a599044b3cf49dcbd049cb45 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 22 May 2026 08:47:06 +0900 Subject: [PATCH 5/5] The 7th batch With this batch, we have flushed all the topics that need to be merged to 'maint' to make its build healthy. Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.55.0.adoc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/RelNotes/2.55.0.adoc b/Documentation/RelNotes/2.55.0.adoc index 328ea76714f993..63c921d9d0fc8e 100644 --- a/Documentation/RelNotes/2.55.0.adoc +++ b/Documentation/RelNotes/2.55.0.adoc @@ -148,6 +148,11 @@ Fixes since v2.54 commits deep, which has been corrected to be a no-op instead. (merge 2431f5e0e5 sp/shallow-deepen-on-non-shallow-repo-fix later to maint). + * "git maintenance" that goes background did not use the lockfile to + prevent multiple maintenance processes from running at the same + time, which has been corrected. + (merge 29364f1624 ps/maintenance-daemonize-lockfix later to maint). + * Other code cleanup, docfix, build fix, etc. (merge 80f4b802e9 ja/doc-difftool-synopsis-style later to maint). (merge b96490241e jc/doc-timestamps-in-stat later to maint). @@ -158,3 +163,5 @@ Fixes since v2.54 (merge ab9753e7bc kh/doc-restore-double-underscores-fix later to maint). (merge 4a9e097228 za/t2000-modernise-more later to maint). (merge b635fd0725 kh/doc-log-decorate-list later to maint). + (merge 65ea197dca jk/commit-sign-overflow-fix later to maint). + (merge 3ccb16052a jk/apply-leakfix later to maint).