cardstack · backspace · Jun 26, 2026 · Jun 26, 2026
diff --git a/packages/realm-server/scripts/migrate-realm-references.sh b/packages/realm-server/scripts/migrate-realm-references.sh
@@ -25,6 +25,9 @@
 #   --exclude <dir>     Skip directories matching <dir> (by name, any depth).
 #                       Repeatable. e.g. --exclude decommissioned to leave
 #                       moved-aside or backup trees untouched.
+#   -j, --jobs <n>      Number of parallel workers (default 16). Files are
+#                       edited concurrently to hide per-file I/O latency on
+#                       networked filesystems (e.g. EFS).
 #
 # Shortcut flags:
 #   -e, --environment   development | staging | production
@@ -73,10 +76,10 @@ set -uo pipefail
 DRY_RUN=false
 JSON_ONLY=false
 MODULES_ONLY=false
+JOBS=16
 ENV=""
 REALM=""
 ERRORS=()
-CHANGED_JSON=()
 EXCLUDE_DIRS=()
 
 while [ $# -gt 0 ]; do
@@ -85,6 +88,10 @@ while [ $# -gt 0 ]; do
       DRY_RUN=true
       shift
       ;;
+    -j|--jobs)
+      JOBS="$2"
+      shift 2
+      ;;
     --json-only)
       JSON_ONLY=true
       shift
@@ -237,6 +244,71 @@ PATCH_FILE="${PATCH_NAME}.patch"
 total_files=0
 > "$PATCH_FILE"
 
+# Paths of .json files that were valid JSON *before* editing. The post-run
+# verification only flags a file if it was valid before and is invalid after
+# (i.e. the replacement broke it) — files that were already non-strict (e.g.
+# trailing commas, unescaped embedded source) are tolerated by the realm
+# server's parser and must not fail the migration.
+VALID_BEFORE_FILE=$(mktemp 2>/dev/null || echo "/tmp/migrate-valid-before.$$")
+> "$VALID_BEFORE_FILE"
+
+# --- Parallel processing scratch ---
+# Files are processed concurrently (xargs -P) because the per-file work is
+# I/O-latency-bound on networked filesystems (EFS). Each worker writes its own
+# patch fragment (concurrent appends to one shared patch file would interleave
+# and corrupt it) and appends results to shared list files; everything is
+# aggregated after the directory loop.
+FRAGMENTS_DIR=$(mktemp -d 2>/dev/null || echo "/tmp/migrate-frags.$$")
+mkdir -p "$FRAGMENTS_DIR"
+CHANGED_JSON_FILE=$(mktemp 2>/dev/null || echo "/tmp/migrate-changed-json.$$")
+PROCESSED_FILE=$(mktemp 2>/dev/null || echo "/tmp/migrate-processed.$$")
+WORKER_ERRORS_FILE=$(mktemp 2>/dev/null || echo "/tmp/migrate-werr.$$")
+> "$CHANGED_JSON_FILE"
+> "$PROCESSED_FILE"
+> "$WORKER_ERRORS_FILE"
+
+# Worker: process a batch of files passed as positional args. Reconstructs the
+# sed program from exported scalars (arrays can't be exported across xargs).
+# Runs in its own `bash -c`, so results go to the shared files above.
+process_files() {
+  local frag="$FRAGMENTS_DIR/frag.$$"
+  local file tmp
+  for file in "$@"; do
+    tmp="$file.tmp.$$"
+    if [ "$IS_URL" = true ]; then
+      if ! sed -e "s|${FIND_STR}|${REPLACEMENT}|g" \
+               -e "s|\"${REALM_PATH}|\"${REPLACEMENT}|g" \
+               -e "s|'${REALM_PATH}|'${REPLACEMENT}|g" \
+               "$file" > "$tmp" 2>/dev/null; then
+        printf '%s\n' "Error processing $file" >> "$WORKER_ERRORS_FILE"
+        rm -f "$tmp"
+        continue
+      fi
+    else
+      if ! sed -e "s|${FIND_STR}|${REPLACEMENT}|g" "$file" > "$tmp" 2>/dev/null; then
+        printf '%s\n' "Error processing $file" >> "$WORKER_ERRORS_FILE"
+        rm -f "$tmp"
+        continue
+      fi
+    fi
+    diff -u --label "$file" --label "$file" "$file" "$tmp" >> "$frag" 2>/dev/null || true
+    printf '%s\n' "$file" >> "$PROCESSED_FILE"
+    if [ "$DRY_RUN" = true ]; then
+      rm -f "$tmp"
+    elif mv "$tmp" "$file" 2>/dev/null; then
+      case "$file" in
+        *.json) printf '%s\n' "$file" >> "$CHANGED_JSON_FILE" ;;
+      esac
+    else
+      printf '%s\n' "Error replacing $file" >> "$WORKER_ERRORS_FILE"
+      rm -f "$tmp"
+    fi
+  done
+}
+export -f process_files
+export FIND_STR REPLACEMENT IS_URL REALM_PATH DRY_RUN
+export FRAGMENTS_DIR CHANGED_JSON_FILE PROCESSED_FILE WORKER_ERRORS_FILE
+
 for search_dir in "$@"; do
   if [ ! -d "$search_dir" ]; then
     echo "Warning: directory '$search_dir' does not exist, skipping."
@@ -264,54 +336,50 @@ for search_dir in "$@"; do
     continue
   fi
 
-  # Build sed args once. For URLs, also handle path-only preceded by " or '
-  DQ='"'
-  if [ "$IS_URL" = true ]; then
-    SED_ARGS=(-e "s|${FIND_STR}|${REPLACEMENT}|g"
-              -e "s|${DQ}${REALM_PATH}|${DQ}${REPLACEMENT}|g"
-              -e "s|'${REALM_PATH}|'${REPLACEMENT}|g")
-  else
-    SED_ARGS=(-e "s|${FIND_STR}|${REPLACEMENT}|g")
-  fi
-
-  for file in "${matching_files[@]}"; do
-    if ! sed "${SED_ARGS[@]}" "$file" > "$file.tmp" 2>/tmp/migrate-err.$$; then
-      err="Error processing $file: $(cat /tmp/migrate-err.$$)"
-      echo "  $err"
-      ERRORS+=("$err")
-      rm -f "$file.tmp" /tmp/migrate-err.$$
-      continue
+  # Record which matching .json files parse cleanly BEFORE editing, so the
+  # post-run verification can distinguish "the replacement broke this" from
+  # "this was already non-strict". One batched node pass per directory.
+  if [ "$DRY_RUN" = false ]; then
+    json_candidates=()
+    for f in "${matching_files[@]}"; do
+      case "$f" in
+        *.json) json_candidates+=("$f") ;;
+      esac
+    done
+    if [ ${#json_candidates[@]} -gt 0 ]; then
+      node -e '
+        const fs = require("fs");
+        for (const f of process.argv.slice(1)) {
+          try {
+            JSON.parse(fs.readFileSync(f, "utf8"));
+            console.log(f);
+          } catch (e) {
+            /* already non-strict; omit so it is not held to the after-check */
+          }
+        }
+      ' "${json_candidates[@]}" >> "$VALID_BEFORE_FILE"
     fi
-    rm -f /tmp/migrate-err.$$
+  fi
 
-    # Append unified diff to the patch file (use --label so both sides show the real path)
-    { diff -u --label "$file" --label "$file" "$file" "$file.tmp" || true; } >> "$PATCH_FILE"
+  echo "  ${#matching_files[@]} file(s) to process (jobs=$JOBS) ..."
 
-    if [ "$DRY_RUN" = true ]; then
-      echo ""
-      echo "  Would update: $file"
-      { diff --unified=0 "$file" "$file.tmp" || true; } | tail -n +3 | grep '^[+-]' | while IFS= read -r line; do
-        echo "    $line"
-      done
-      rm -f "$file.tmp"
-    else
-      if ! mv "$file.tmp" "$file" 2>/tmp/migrate-err.$$; then
-        err="Error replacing $file: $(cat /tmp/migrate-err.$$)"
-        echo "  $err"
-        ERRORS+=("$err")
-        rm -f "$file.tmp" /tmp/migrate-err.$$
-        continue
-      fi
-      rm -f /tmp/migrate-err.$$
-      echo "  Updated: $file"
-      case "$file" in
-        *.json) CHANGED_JSON+=("$file") ;;
-      esac
-    fi
-    total_files=$((total_files + 1))
-  done
+  # Process this directory's matching files concurrently. NUL-delimited so any
+  # path (spaces/newlines) is safe; -n batches files per worker to amortize the
+  # bash fork; -P runs JOBS workers at once to hide per-file EFS latency.
+  printf '%s\0' "${matching_files[@]}" \
+    | xargs -0 -P "$JOBS" -n 50 bash -c 'process_files "$@"' _
 done
 
+# --- Aggregate parallel results ---
+if ls "$FRAGMENTS_DIR"/frag.* >/dev/null 2>&1; then
+  cat "$FRAGMENTS_DIR"/frag.* >> "$PATCH_FILE"
+fi
+total_files=$(wc -l < "$PROCESSED_FILE" 2>/dev/null | tr -d '[:space:]')
+[ -z "$total_files" ] && total_files=0
+while IFS= read -r werr; do
+  [ -n "$werr" ] && ERRORS+=("$werr")
+done < "$WORKER_ERRORS_FILE"
+
 echo ""
 if [ "$DRY_RUN" = true ]; then
   echo "Dry run complete. $total_files file(s) would be updated."
@@ -323,31 +391,54 @@ else
   echo "  To undo: patch -R -p0 < $PATCH_FILE"
 fi
 
-# Verify every changed JSON file still parses, so a bad replacement can't
-# silently corrupt a card document. Failures are reported and force a
-# non-zero exit; roll back with the patch above.
-if [ "$DRY_RUN" = false ] && [ ${#CHANGED_JSON[@]} -gt 0 ]; then
+# Verify the replacement didn't turn any *previously valid* JSON invalid.
+# Files that were already non-strict before editing (captured in
+# VALID_BEFORE_FILE) are tolerated by the realm server's lenient parser, so
+# they're reported as a note but don't fail the run — only a genuine
+# valid -> invalid regression forces a non-zero exit.
+changed_json_count=$(wc -l < "$CHANGED_JSON_FILE" 2>/dev/null | tr -d '[:space:]')
+[ -z "$changed_json_count" ] && changed_json_count=0
+if [ "$DRY_RUN" = false ] && [ "$changed_json_count" -gt 0 ]; then
   echo ""
-  echo "Verifying ${#CHANGED_JSON[@]} changed JSON file(s) still parse ..."
+  echo "Verifying $changed_json_count changed JSON file(s) ..."
+  # Both path lists are read from files (not argv) so this scales past ARG_MAX.
   if ! node -e '
     const fs = require("fs");
-    let bad = 0;
-    for (const f of process.argv.slice(1)) {
+    const validBefore = new Set(
+      fs.readFileSync(process.argv[1], "utf8").split("\n").filter(Boolean)
+    );
+    const changed = fs.readFileSync(process.argv[2], "utf8").split("\n").filter(Boolean);
+    let broke = 0;
+    let preexisting = 0;
+    for (const f of changed) {
       try {
         JSON.parse(fs.readFileSync(f, "utf8"));
       } catch (e) {
-        console.error("  Invalid JSON after migration: " + f + ": " + e.message);
-        bad++;
+        if (validBefore.has(f)) {
+          console.error("  Migration broke valid JSON: " + f + ": " + e.message);
+          broke++;
+        } else {
+          preexisting++;
+        }
       }
     }
-    process.exit(bad > 0 ? 1 : 0);
-  ' "${CHANGED_JSON[@]}"; then
-    ERRORS+=("JSON validation failed for one or more migrated files (see above). Roll back with: patch -R -p0 < $PATCH_FILE")
+    if (preexisting > 0) {
+      console.error(
+        "  Note: " + preexisting +
+        " changed file(s) were already non-strict JSON before the migration (not flagged)."
+      );
+    }
+    process.exit(broke > 0 ? 1 : 0);
+  ' "$VALID_BEFORE_FILE" "$CHANGED_JSON_FILE"; then
+    ERRORS+=("Migration turned previously-valid JSON invalid in one or more files (see above). Roll back with: patch -R -p0 < $PATCH_FILE")
   else
-    echo "  All migrated JSON files parse cleanly."
+    echo "  No previously-valid JSON was broken."
   fi
 fi
 
+rm -f "$VALID_BEFORE_FILE" "$CHANGED_JSON_FILE" "$PROCESSED_FILE" "$WORKER_ERRORS_FILE"
+rm -rf "$FRAGMENTS_DIR"
+
 if [ ${#ERRORS[@]} -gt 0 ]; then
   echo ""
   echo "WARNING: ${#ERRORS[@]} error(s) encountered during processing:"