Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
205 changes: 148 additions & 57 deletions packages/realm-server/scripts/migrate-realm-references.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
# --exclude <dir> Skip directories matching <dir> (by name, any depth).
# Repeatable. e.g. --exclude decommissioned to leave
# moved-aside or backup trees untouched.
# -j, --jobs <n> Number of parallel workers (default 16). Files are
# edited concurrently to hide per-file I/O latency on
# networked filesystems (e.g. EFS).
#
# Shortcut flags:
# -e, --environment development | staging | production
Expand Down Expand Up @@ -73,10 +76,10 @@ set -uo pipefail
DRY_RUN=false
JSON_ONLY=false
MODULES_ONLY=false
JOBS=16
ENV=""
REALM=""
ERRORS=()
CHANGED_JSON=()
EXCLUDE_DIRS=()

while [ $# -gt 0 ]; do
Expand All @@ -85,6 +88,10 @@ while [ $# -gt 0 ]; do
DRY_RUN=true
shift
;;
-j|--jobs)
JOBS="$2"
shift 2
;;
--json-only)
JSON_ONLY=true
shift
Expand Down Expand Up @@ -237,6 +244,71 @@ PATCH_FILE="${PATCH_NAME}.patch"
total_files=0
> "$PATCH_FILE"

# Paths of .json files that were valid JSON *before* editing. The post-run
# verification only flags a file if it was valid before and is invalid after
# (i.e. the replacement broke it) — files that were already non-strict (e.g.
# trailing commas, unescaped embedded source) are tolerated by the realm
# server's parser and must not fail the migration.
VALID_BEFORE_FILE=$(mktemp 2>/dev/null || echo "/tmp/migrate-valid-before.$$")
> "$VALID_BEFORE_FILE"

# --- Parallel processing scratch ---
# Files are processed concurrently (xargs -P) because the per-file work is
# I/O-latency-bound on networked filesystems (EFS). Each worker writes its own
# patch fragment (concurrent appends to one shared patch file would interleave
# and corrupt it) and appends results to shared list files; everything is
# aggregated after the directory loop.
FRAGMENTS_DIR=$(mktemp -d 2>/dev/null || echo "/tmp/migrate-frags.$$")
mkdir -p "$FRAGMENTS_DIR"
CHANGED_JSON_FILE=$(mktemp 2>/dev/null || echo "/tmp/migrate-changed-json.$$")
PROCESSED_FILE=$(mktemp 2>/dev/null || echo "/tmp/migrate-processed.$$")
WORKER_ERRORS_FILE=$(mktemp 2>/dev/null || echo "/tmp/migrate-werr.$$")
> "$CHANGED_JSON_FILE"
> "$PROCESSED_FILE"
> "$WORKER_ERRORS_FILE"

# Worker: process a batch of files passed as positional args. Reconstructs the
# sed program from exported scalars (arrays can't be exported across xargs).
# Runs in its own `bash -c`, so results go to the shared files above.
process_files() {
local frag="$FRAGMENTS_DIR/frag.$$"
local file tmp
for file in "$@"; do
tmp="$file.tmp.$$"
if [ "$IS_URL" = true ]; then
if ! sed -e "s|${FIND_STR}|${REPLACEMENT}|g" \
-e "s|\"${REALM_PATH}|\"${REPLACEMENT}|g" \
-e "s|'${REALM_PATH}|'${REPLACEMENT}|g" \
"$file" > "$tmp" 2>/dev/null; then
printf '%s\n' "Error processing $file" >> "$WORKER_ERRORS_FILE"
rm -f "$tmp"
continue
fi
else
if ! sed -e "s|${FIND_STR}|${REPLACEMENT}|g" "$file" > "$tmp" 2>/dev/null; then
printf '%s\n' "Error processing $file" >> "$WORKER_ERRORS_FILE"
rm -f "$tmp"
continue
fi
fi
diff -u --label "$file" --label "$file" "$file" "$tmp" >> "$frag" 2>/dev/null || true
printf '%s\n' "$file" >> "$PROCESSED_FILE"
if [ "$DRY_RUN" = true ]; then
rm -f "$tmp"
elif mv "$tmp" "$file" 2>/dev/null; then
case "$file" in
*.json) printf '%s\n' "$file" >> "$CHANGED_JSON_FILE" ;;
esac
else
printf '%s\n' "Error replacing $file" >> "$WORKER_ERRORS_FILE"
rm -f "$tmp"
fi
done
}
export -f process_files
export FIND_STR REPLACEMENT IS_URL REALM_PATH DRY_RUN
export FRAGMENTS_DIR CHANGED_JSON_FILE PROCESSED_FILE WORKER_ERRORS_FILE

for search_dir in "$@"; do
if [ ! -d "$search_dir" ]; then
echo "Warning: directory '$search_dir' does not exist, skipping."
Expand Down Expand Up @@ -264,54 +336,50 @@ for search_dir in "$@"; do
continue
fi

# Build sed args once. For URLs, also handle path-only preceded by " or '
DQ='"'
if [ "$IS_URL" = true ]; then
SED_ARGS=(-e "s|${FIND_STR}|${REPLACEMENT}|g"
-e "s|${DQ}${REALM_PATH}|${DQ}${REPLACEMENT}|g"
-e "s|'${REALM_PATH}|'${REPLACEMENT}|g")
else
SED_ARGS=(-e "s|${FIND_STR}|${REPLACEMENT}|g")
fi

for file in "${matching_files[@]}"; do
if ! sed "${SED_ARGS[@]}" "$file" > "$file.tmp" 2>/tmp/migrate-err.$$; then
err="Error processing $file: $(cat /tmp/migrate-err.$$)"
echo " $err"
ERRORS+=("$err")
rm -f "$file.tmp" /tmp/migrate-err.$$
continue
# Record which matching .json files parse cleanly BEFORE editing, so the
# post-run verification can distinguish "the replacement broke this" from
# "this was already non-strict". One batched node pass per directory.
if [ "$DRY_RUN" = false ]; then
json_candidates=()
for f in "${matching_files[@]}"; do
case "$f" in
*.json) json_candidates+=("$f") ;;
esac
done
if [ ${#json_candidates[@]} -gt 0 ]; then
node -e '
const fs = require("fs");
for (const f of process.argv.slice(1)) {
try {
JSON.parse(fs.readFileSync(f, "utf8"));
console.log(f);
} catch (e) {
/* already non-strict; omit so it is not held to the after-check */
}
}
' "${json_candidates[@]}" >> "$VALID_BEFORE_FILE"
fi
rm -f /tmp/migrate-err.$$
fi

# Append unified diff to the patch file (use --label so both sides show the real path)
{ diff -u --label "$file" --label "$file" "$file" "$file.tmp" || true; } >> "$PATCH_FILE"
echo " ${#matching_files[@]} file(s) to process (jobs=$JOBS) ..."

if [ "$DRY_RUN" = true ]; then
echo ""
echo " Would update: $file"
{ diff --unified=0 "$file" "$file.tmp" || true; } | tail -n +3 | grep '^[+-]' | while IFS= read -r line; do
echo " $line"
done
rm -f "$file.tmp"
else
if ! mv "$file.tmp" "$file" 2>/tmp/migrate-err.$$; then
err="Error replacing $file: $(cat /tmp/migrate-err.$$)"
echo " $err"
ERRORS+=("$err")
rm -f "$file.tmp" /tmp/migrate-err.$$
continue
fi
rm -f /tmp/migrate-err.$$
echo " Updated: $file"
case "$file" in
*.json) CHANGED_JSON+=("$file") ;;
esac
fi
total_files=$((total_files + 1))
done
# Process this directory's matching files concurrently. NUL-delimited so any
# path (spaces/newlines) is safe; -n batches files per worker to amortize the
# bash fork; -P runs JOBS workers at once to hide per-file EFS latency.
printf '%s\0' "${matching_files[@]}" \
| xargs -0 -P "$JOBS" -n 50 bash -c 'process_files "$@"' _
done

# --- Aggregate parallel results ---
if ls "$FRAGMENTS_DIR"/frag.* >/dev/null 2>&1; then
cat "$FRAGMENTS_DIR"/frag.* >> "$PATCH_FILE"
fi
total_files=$(wc -l < "$PROCESSED_FILE" 2>/dev/null | tr -d '[:space:]')
[ -z "$total_files" ] && total_files=0
while IFS= read -r werr; do
[ -n "$werr" ] && ERRORS+=("$werr")
done < "$WORKER_ERRORS_FILE"

echo ""
if [ "$DRY_RUN" = true ]; then
echo "Dry run complete. $total_files file(s) would be updated."
Expand All @@ -323,31 +391,54 @@ else
echo " To undo: patch -R -p0 < $PATCH_FILE"
fi

# Verify every changed JSON file still parses, so a bad replacement can't
# silently corrupt a card document. Failures are reported and force a
# non-zero exit; roll back with the patch above.
if [ "$DRY_RUN" = false ] && [ ${#CHANGED_JSON[@]} -gt 0 ]; then
# Verify the replacement didn't turn any *previously valid* JSON invalid.
# Files that were already non-strict before editing (captured in
# VALID_BEFORE_FILE) are tolerated by the realm server's lenient parser, so
# they're reported as a note but don't fail the run — only a genuine
# valid -> invalid regression forces a non-zero exit.
changed_json_count=$(wc -l < "$CHANGED_JSON_FILE" 2>/dev/null | tr -d '[:space:]')
[ -z "$changed_json_count" ] && changed_json_count=0
if [ "$DRY_RUN" = false ] && [ "$changed_json_count" -gt 0 ]; then
echo ""
echo "Verifying ${#CHANGED_JSON[@]} changed JSON file(s) still parse ..."
echo "Verifying $changed_json_count changed JSON file(s) ..."
# Both path lists are read from files (not argv) so this scales past ARG_MAX.
if ! node -e '
const fs = require("fs");
let bad = 0;
for (const f of process.argv.slice(1)) {
const validBefore = new Set(
fs.readFileSync(process.argv[1], "utf8").split("\n").filter(Boolean)
);
const changed = fs.readFileSync(process.argv[2], "utf8").split("\n").filter(Boolean);
let broke = 0;
let preexisting = 0;
for (const f of changed) {
try {
JSON.parse(fs.readFileSync(f, "utf8"));
} catch (e) {
console.error(" Invalid JSON after migration: " + f + ": " + e.message);
bad++;
if (validBefore.has(f)) {
console.error(" Migration broke valid JSON: " + f + ": " + e.message);
broke++;
} else {
preexisting++;
}
}
}
process.exit(bad > 0 ? 1 : 0);
' "${CHANGED_JSON[@]}"; then
ERRORS+=("JSON validation failed for one or more migrated files (see above). Roll back with: patch -R -p0 < $PATCH_FILE")
if (preexisting > 0) {
console.error(
" Note: " + preexisting +
" changed file(s) were already non-strict JSON before the migration (not flagged)."
);
}
process.exit(broke > 0 ? 1 : 0);
' "$VALID_BEFORE_FILE" "$CHANGED_JSON_FILE"; then
ERRORS+=("Migration turned previously-valid JSON invalid in one or more files (see above). Roll back with: patch -R -p0 < $PATCH_FILE")
else
echo " All migrated JSON files parse cleanly."
echo " No previously-valid JSON was broken."
fi
fi

rm -f "$VALID_BEFORE_FILE" "$CHANGED_JSON_FILE" "$PROCESSED_FILE" "$WORKER_ERRORS_FILE"
rm -rf "$FRAGMENTS_DIR"

if [ ${#ERRORS[@]} -gt 0 ]; then
echo ""
echo "WARNING: ${#ERRORS[@]} error(s) encountered during processing:"
Expand Down
Loading