From bdc5a93d52552aac32c16b6fb4cb0029ca486b6f Mon Sep 17 00:00:00 2001 From: mike-diff Date: Tue, 23 Jun 2026 20:00:51 -0700 Subject: [PATCH] ci(dispatch): retry the sesh install on transient release-fetch failures The repo's release.yml deletes and recreates the rolling release on every push to main (delete tag, rebuild assets, republish). For the few seconds that rebuild takes, releases/latest/download/ returns 404. A dispatch run whose install step lands in that window dies on a transient 404, as run 28070847550 did: the install step hit 404 at 02:25:03, two seconds before the release was republished at 02:25:05. install.sh is the public installer and keeps its conservative no-retry curl flags (correct for one-shot user installs). The race tolerance belongs in the dispatch workflow, where concurrent workflows against the same rolling release actually happen. Retry only transient fetch failures: curl exit 22 (HTTP error, e.g. 404) and 18 (partial transfer). A checksum mismatch is deterministic and is NOT retried (a corrupt or tampered download must fail loudly). Five attempts, 10s between, then fail. Verified by simulating the control flow: 404-then-success recovers; checksum mismatch aborts immediately; persistent 404 exhausts then fails; happy path no retry. build/vet green; no em dashes per AGENTS.md. --- .github/workflows/dispatch.yml | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/.github/workflows/dispatch.yml b/.github/workflows/dispatch.yml index 2a32481..ee67236 100644 --- a/.github/workflows/dispatch.yml +++ b/.github/workflows/dispatch.yml @@ -103,10 +103,35 @@ jobs: cache: true - name: Install sesh (checksum-verified, via the repo's own installer) + # install.sh downloads the rolling release, verifies its SHA-256 + # against the published SHA256SUMS, then runs sesh -install. The + # rolling release is deleted and recreated on every push to main by + # release.yml, so releases/latest/download/ 404s for the few + # seconds that rebuild takes. A dispatch that races that window dies + # on a transient 404. Retry only the fetch failures (curl 22 = HTTP + # error such as 404, 18 = partial transfer); a checksum mismatch is + # deterministic and must NOT be retried. run: | - sh ./install.sh - # install.sh downloads the rolling release, verifies its SHA-256 - # against the published SHA256SUMS, then runs sesh -install. + set +e + for attempt in 1 2 3 4 5; do + sh ./install.sh + rc=$? + if [ "$rc" -eq 0 ]; then + echo "install.sh succeeded on attempt $attempt" + break + fi + if [ "$rc" -ne 22 ] && [ "$rc" -ne 18 ]; then + echo "install.sh failed with rc=$rc (non-transient); not retrying" + exit "$rc" + fi + if [ "$attempt" -eq 5 ]; then + echo "install.sh failed after $attempt attempts (rc=$rc)" + exit "$rc" + fi + echo "install.sh attempt $attempt failed (rc=$rc); the rolling release may be mid-rebuild. Retrying in 10s." + sleep 10 + done + set -e ~/.local/bin/sesh -version - name: Verify config (fail fast on a bad key or endpoint)