From ef128dcfa55be23da307ac44b1245790d7f3747a Mon Sep 17 00:00:00 2001
From: Buck Doyle <buck.doyle@cardstack.com>
Date: Fri, 26 Jun 2026 13:48:17 -0500
Subject: [PATCH 1/2] Only fail JSON verification on regressions, not
 pre-existing non-strict files
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The post-run JSON check flagged any changed file that didn't parse strictly,
but a prefix substitution can't turn valid JSON invalid (it never touches
structural characters) — so every failure was a file that was already
non-strict (trailing commas, unescaped embedded source) and which the realm
server's lenient parser tolerates. Record which files parsed cleanly before
editing and only fail on a valid -> invalid regression; report already-non-strict
files as a note. A genuine break still exits non-zero.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../scripts/migrate-realm-references.sh       | 75 ++++++++++++++++---
 1 file changed, 63 insertions(+), 12 deletions(-)

diff --git a/packages/realm-server/scripts/migrate-realm-references.sh b/packages/realm-server/scripts/migrate-realm-references.sh
index fc967fb641..58dc3ec51f 100755
--- a/packages/realm-server/scripts/migrate-realm-references.sh
+++ b/packages/realm-server/scripts/migrate-realm-references.sh
@@ -237,6 +237,14 @@ PATCH_FILE="${PATCH_NAME}.patch"
 total_files=0
 > "$PATCH_FILE"
 
+# Paths of .json files that were valid JSON *before* editing. The post-run
+# verification only flags a file if it was valid before and is invalid after
+# (i.e. the replacement broke it) — files that were already non-strict (e.g.
+# trailing commas, unescaped embedded source) are tolerated by the realm
+# server's parser and must not fail the migration.
+VALID_BEFORE_FILE=$(mktemp 2>/dev/null || echo "/tmp/migrate-valid-before.$$")
+> "$VALID_BEFORE_FILE"
+
 for search_dir in "$@"; do
   if [ ! -d "$search_dir" ]; then
     echo "Warning: directory '$search_dir' does not exist, skipping."
@@ -264,6 +272,31 @@ for search_dir in "$@"; do
     continue
   fi
 
+  # Record which matching .json files parse cleanly BEFORE editing, so the
+  # post-run verification can distinguish "the replacement broke this" from
+  # "this was already non-strict". One batched node pass per directory.
+  if [ "$DRY_RUN" = false ]; then
+    json_candidates=()
+    for f in "${matching_files[@]}"; do
+      case "$f" in
+        *.json) json_candidates+=("$f") ;;
+      esac
+    done
+    if [ ${#json_candidates[@]} -gt 0 ]; then
+      node -e '
+        const fs = require("fs");
+        for (const f of process.argv.slice(1)) {
+          try {
+            JSON.parse(fs.readFileSync(f, "utf8"));
+            console.log(f);
+          } catch (e) {
+            /* already non-strict; omit so it is not held to the after-check */
+          }
+        }
+      ' "${json_candidates[@]}" >> "$VALID_BEFORE_FILE"
+    fi
+  fi
+
   # Build sed args once. For URLs, also handle path-only preceded by " or '
   DQ='"'
   if [ "$IS_URL" = true ]; then
@@ -323,31 +356,49 @@ else
   echo "  To undo: patch -R -p0 < $PATCH_FILE"
 fi
 
-# Verify every changed JSON file still parses, so a bad replacement can't
-# silently corrupt a card document. Failures are reported and force a
-# non-zero exit; roll back with the patch above.
+# Verify the replacement didn't turn any *previously valid* JSON invalid.
+# Files that were already non-strict before editing (captured in
+# VALID_BEFORE_FILE) are tolerated by the realm server's lenient parser, so
+# they're reported as a note but don't fail the run — only a genuine
+# valid -> invalid regression forces a non-zero exit.
 if [ "$DRY_RUN" = false ] && [ ${#CHANGED_JSON[@]} -gt 0 ]; then
   echo ""
-  echo "Verifying ${#CHANGED_JSON[@]} changed JSON file(s) still parse ..."
+  echo "Verifying ${#CHANGED_JSON[@]} changed JSON file(s) ..."
   if ! node -e '
     const fs = require("fs");
-    let bad = 0;
-    for (const f of process.argv.slice(1)) {
+    const validBefore = new Set(
+      fs.readFileSync(process.argv[1], "utf8").split("\n").filter(Boolean)
+    );
+    let broke = 0;
+    let preexisting = 0;
+    for (const f of process.argv.slice(2)) {
       try {
         JSON.parse(fs.readFileSync(f, "utf8"));
       } catch (e) {
-        console.error("  Invalid JSON after migration: " + f + ": " + e.message);
-        bad++;
+        if (validBefore.has(f)) {
+          console.error("  Migration broke valid JSON: " + f + ": " + e.message);
+          broke++;
+        } else {
+          preexisting++;
+        }
       }
     }
-    process.exit(bad > 0 ? 1 : 0);
-  ' "${CHANGED_JSON[@]}"; then
-    ERRORS+=("JSON validation failed for one or more migrated files (see above). Roll back with: patch -R -p0 < $PATCH_FILE")
+    if (preexisting > 0) {
+      console.error(
+        "  Note: " + preexisting +
+        " changed file(s) were already non-strict JSON before the migration (not flagged)."
+      );
+    }
+    process.exit(broke > 0 ? 1 : 0);
+  ' "$VALID_BEFORE_FILE" "${CHANGED_JSON[@]}"; then
+    ERRORS+=("Migration turned previously-valid JSON invalid in one or more files (see above). Roll back with: patch -R -p0 < $PATCH_FILE")
   else
-    echo "  All migrated JSON files parse cleanly."
+    echo "  No previously-valid JSON was broken."
   fi
 fi
 
+rm -f "$VALID_BEFORE_FILE"
+
 if [ ${#ERRORS[@]} -gt 0 ]; then
   echo ""
   echo "WARNING: ${#ERRORS[@]} error(s) encountered during processing:"

From 3f9f59951c998de70959c1af8760afc76bacde33 Mon Sep 17 00:00:00 2001
From: Buck Doyle <buck.doyle@cardstack.com>
Date: Fri, 26 Jun 2026 14:55:21 -0500
Subject: [PATCH 2/2] Parallelize migrate-realm-references.sh

Edit files concurrently via `xargs -P` to hide per-file I/O latency on
networked filesystems (EFS), where the sequential per-file loop was the
bottleneck on large realm trees.

- Add -j/--jobs <n> flag (default 16).
- Each worker writes its own patch fragment (concurrent appends to one
  shared patch file would interleave and corrupt it); fragments are
  concatenated into the rollback patch after the run. Worker results
  (changed-JSON list, processed count, errors) go to shared list files
  aggregated afterward, since arrays can't cross the xargs boundary.
- JSON verification reads its path lists from files rather than argv,
  which also makes it ARG_MAX-safe.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../scripts/migrate-realm-references.sh       | 142 +++++++++++-------
 1 file changed, 91 insertions(+), 51 deletions(-)

diff --git a/packages/realm-server/scripts/migrate-realm-references.sh b/packages/realm-server/scripts/migrate-realm-references.sh
index 58dc3ec51f..facac90ee8 100755
--- a/packages/realm-server/scripts/migrate-realm-references.sh
+++ b/packages/realm-server/scripts/migrate-realm-references.sh
@@ -25,6 +25,9 @@
 #   --exclude <dir>     Skip directories matching <dir> (by name, any depth).
 #                       Repeatable. e.g. --exclude decommissioned to leave
 #                       moved-aside or backup trees untouched.
+#   -j, --jobs <n>      Number of parallel workers (default 16). Files are
+#                       edited concurrently to hide per-file I/O latency on
+#                       networked filesystems (e.g. EFS).
 #
 # Shortcut flags:
 #   -e, --environment   development | staging | production
@@ -73,10 +76,10 @@ set -uo pipefail
 DRY_RUN=false
 JSON_ONLY=false
 MODULES_ONLY=false
+JOBS=16
 ENV=""
 REALM=""
 ERRORS=()
-CHANGED_JSON=()
 EXCLUDE_DIRS=()
 
 while [ $# -gt 0 ]; do
@@ -85,6 +88,10 @@ while [ $# -gt 0 ]; do
       DRY_RUN=true
       shift
       ;;
+    -j|--jobs)
+      JOBS="$2"
+      shift 2
+      ;;
     --json-only)
       JSON_ONLY=true
       shift
@@ -245,6 +252,63 @@ total_files=0
 VALID_BEFORE_FILE=$(mktemp 2>/dev/null || echo "/tmp/migrate-valid-before.$$")
 > "$VALID_BEFORE_FILE"
 
+# --- Parallel processing scratch ---
+# Files are processed concurrently (xargs -P) because the per-file work is
+# I/O-latency-bound on networked filesystems (EFS). Each worker writes its own
+# patch fragment (concurrent appends to one shared patch file would interleave
+# and corrupt it) and appends results to shared list files; everything is
+# aggregated after the directory loop.
+FRAGMENTS_DIR=$(mktemp -d 2>/dev/null || echo "/tmp/migrate-frags.$$")
+mkdir -p "$FRAGMENTS_DIR"
+CHANGED_JSON_FILE=$(mktemp 2>/dev/null || echo "/tmp/migrate-changed-json.$$")
+PROCESSED_FILE=$(mktemp 2>/dev/null || echo "/tmp/migrate-processed.$$")
+WORKER_ERRORS_FILE=$(mktemp 2>/dev/null || echo "/tmp/migrate-werr.$$")
+> "$CHANGED_JSON_FILE"
+> "$PROCESSED_FILE"
+> "$WORKER_ERRORS_FILE"
+
+# Worker: process a batch of files passed as positional args. Reconstructs the
+# sed program from exported scalars (arrays can't be exported across xargs).
+# Runs in its own `bash -c`, so results go to the shared files above.
+process_files() {
+  local frag="$FRAGMENTS_DIR/frag.$$"
+  local file tmp
+  for file in "$@"; do
+    tmp="$file.tmp.$$"
+    if [ "$IS_URL" = true ]; then
+      if ! sed -e "s|${FIND_STR}|${REPLACEMENT}|g" \
+               -e "s|\"${REALM_PATH}|\"${REPLACEMENT}|g" \
+               -e "s|'${REALM_PATH}|'${REPLACEMENT}|g" \
+               "$file" > "$tmp" 2>/dev/null; then
+        printf '%s\n' "Error processing $file" >> "$WORKER_ERRORS_FILE"
+        rm -f "$tmp"
+        continue
+      fi
+    else
+      if ! sed -e "s|${FIND_STR}|${REPLACEMENT}|g" "$file" > "$tmp" 2>/dev/null; then
+        printf '%s\n' "Error processing $file" >> "$WORKER_ERRORS_FILE"
+        rm -f "$tmp"
+        continue
+      fi
+    fi
+    diff -u --label "$file" --label "$file" "$file" "$tmp" >> "$frag" 2>/dev/null || true
+    printf '%s\n' "$file" >> "$PROCESSED_FILE"
+    if [ "$DRY_RUN" = true ]; then
+      rm -f "$tmp"
+    elif mv "$tmp" "$file" 2>/dev/null; then
+      case "$file" in
+        *.json) printf '%s\n' "$file" >> "$CHANGED_JSON_FILE" ;;
+      esac
+    else
+      printf '%s\n' "Error replacing $file" >> "$WORKER_ERRORS_FILE"
+      rm -f "$tmp"
+    fi
+  done
+}
+export -f process_files
+export FIND_STR REPLACEMENT IS_URL REALM_PATH DRY_RUN
+export FRAGMENTS_DIR CHANGED_JSON_FILE PROCESSED_FILE WORKER_ERRORS_FILE
+
 for search_dir in "$@"; do
   if [ ! -d "$search_dir" ]; then
     echo "Warning: directory '$search_dir' does not exist, skipping."
@@ -297,54 +361,25 @@ for search_dir in "$@"; do
     fi
   fi
 
-  # Build sed args once. For URLs, also handle path-only preceded by " or '
-  DQ='"'
-  if [ "$IS_URL" = true ]; then
-    SED_ARGS=(-e "s|${FIND_STR}|${REPLACEMENT}|g"
-              -e "s|${DQ}${REALM_PATH}|${DQ}${REPLACEMENT}|g"
-              -e "s|'${REALM_PATH}|'${REPLACEMENT}|g")
-  else
-    SED_ARGS=(-e "s|${FIND_STR}|${REPLACEMENT}|g")
-  fi
-
-  for file in "${matching_files[@]}"; do
-    if ! sed "${SED_ARGS[@]}" "$file" > "$file.tmp" 2>/tmp/migrate-err.$$; then
-      err="Error processing $file: $(cat /tmp/migrate-err.$$)"
-      echo "  $err"
-      ERRORS+=("$err")
-      rm -f "$file.tmp" /tmp/migrate-err.$$
-      continue
-    fi
-    rm -f /tmp/migrate-err.$$
+  echo "  ${#matching_files[@]} file(s) to process (jobs=$JOBS) ..."
 
-    # Append unified diff to the patch file (use --label so both sides show the real path)
-    { diff -u --label "$file" --label "$file" "$file" "$file.tmp" || true; } >> "$PATCH_FILE"
-
-    if [ "$DRY_RUN" = true ]; then
-      echo ""
-      echo "  Would update: $file"
-      { diff --unified=0 "$file" "$file.tmp" || true; } | tail -n +3 | grep '^[+-]' | while IFS= read -r line; do
-        echo "    $line"
-      done
-      rm -f "$file.tmp"
-    else
-      if ! mv "$file.tmp" "$file" 2>/tmp/migrate-err.$$; then
-        err="Error replacing $file: $(cat /tmp/migrate-err.$$)"
-        echo "  $err"
-        ERRORS+=("$err")
-        rm -f "$file.tmp" /tmp/migrate-err.$$
-        continue
-      fi
-      rm -f /tmp/migrate-err.$$
-      echo "  Updated: $file"
-      case "$file" in
-        *.json) CHANGED_JSON+=("$file") ;;
-      esac
-    fi
-    total_files=$((total_files + 1))
-  done
+  # Process this directory's matching files concurrently. NUL-delimited so any
+  # path (spaces/newlines) is safe; -n batches files per worker to amortize the
+  # bash fork; -P runs JOBS workers at once to hide per-file EFS latency.
+  printf '%s\0' "${matching_files[@]}" \
+    | xargs -0 -P "$JOBS" -n 50 bash -c 'process_files "$@"' _
 done
 
+# --- Aggregate parallel results ---
+if ls "$FRAGMENTS_DIR"/frag.* >/dev/null 2>&1; then
+  cat "$FRAGMENTS_DIR"/frag.* >> "$PATCH_FILE"
+fi
+total_files=$(wc -l < "$PROCESSED_FILE" 2>/dev/null | tr -d '[:space:]')
+[ -z "$total_files" ] && total_files=0
+while IFS= read -r werr; do
+  [ -n "$werr" ] && ERRORS+=("$werr")
+done < "$WORKER_ERRORS_FILE"
+
 echo ""
 if [ "$DRY_RUN" = true ]; then
   echo "Dry run complete. $total_files file(s) would be updated."
@@ -361,17 +396,21 @@ fi
 # VALID_BEFORE_FILE) are tolerated by the realm server's lenient parser, so
 # they're reported as a note but don't fail the run — only a genuine
 # valid -> invalid regression forces a non-zero exit.
-if [ "$DRY_RUN" = false ] && [ ${#CHANGED_JSON[@]} -gt 0 ]; then
+changed_json_count=$(wc -l < "$CHANGED_JSON_FILE" 2>/dev/null | tr -d '[:space:]')
+[ -z "$changed_json_count" ] && changed_json_count=0
+if [ "$DRY_RUN" = false ] && [ "$changed_json_count" -gt 0 ]; then
   echo ""
-  echo "Verifying ${#CHANGED_JSON[@]} changed JSON file(s) ..."
+  echo "Verifying $changed_json_count changed JSON file(s) ..."
+  # Both path lists are read from files (not argv) so this scales past ARG_MAX.
   if ! node -e '
     const fs = require("fs");
     const validBefore = new Set(
       fs.readFileSync(process.argv[1], "utf8").split("\n").filter(Boolean)
     );
+    const changed = fs.readFileSync(process.argv[2], "utf8").split("\n").filter(Boolean);
     let broke = 0;
     let preexisting = 0;
-    for (const f of process.argv.slice(2)) {
+    for (const f of changed) {
       try {
         JSON.parse(fs.readFileSync(f, "utf8"));
       } catch (e) {
@@ -390,14 +429,15 @@ if [ "$DRY_RUN" = false ] && [ ${#CHANGED_JSON[@]} -gt 0 ]; then
       );
     }
     process.exit(broke > 0 ? 1 : 0);
-  ' "$VALID_BEFORE_FILE" "${CHANGED_JSON[@]}"; then
+  ' "$VALID_BEFORE_FILE" "$CHANGED_JSON_FILE"; then
     ERRORS+=("Migration turned previously-valid JSON invalid in one or more files (see above). Roll back with: patch -R -p0 < $PATCH_FILE")
   else
     echo "  No previously-valid JSON was broken."
   fi
 fi
 
-rm -f "$VALID_BEFORE_FILE"
+rm -f "$VALID_BEFORE_FILE" "$CHANGED_JSON_FILE" "$PROCESSED_FILE" "$WORKER_ERRORS_FILE"
+rm -rf "$FRAGMENTS_DIR"
 
 if [ ${#ERRORS[@]} -gt 0 ]; then
   echo ""