From 41a3a1ad1b8083f2bb344ebfbaa8e924ebf974df Mon Sep 17 00:00:00 2001 From: Subhash Khileri Date: Wed, 20 May 2026 13:41:12 +0530 Subject: [PATCH 1/6] =?UTF-8?q?perf(scripts):=20parallelize=20IIB=20bundle?= =?UTF-8?q?=20processing=20(~27=20min=20=E2=86=92=20~5=20min)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Skips slow `skopeo inspect` (~42s/bundle) — attempts the copy directly instead; failed copies (~3s) are faster than successful inspects - Processes bundles in parallel up to MAX_PARALLEL (default 10), with a portable kill-0 throttle loop that prunes finished PIDs each iteration - Collects per-worker sed files and applies them in one pass after all bundles complete, avoiding concurrent writes to render.yaml - Runs `opm render` and cluster registry setup in parallel since they are independent; waits before the bundle-processing phase begins - Replaces check-then-delete secret pattern with --ignore-not-found - Deletes existing CatalogSource before recreating to force OLM re-index when the tag is unchanged but the digest has changed (rebuilt IIB) - Fails loudly if any bundle fails to process (was: silent error log) Assisted-by: Claude Code Co-Authored-By: Claude Code --- .rhdh/scripts/install-rhdh-catalog-source.sh | 161 ++++++++++++++----- 1 file changed, 118 insertions(+), 43 deletions(-) diff --git a/.rhdh/scripts/install-rhdh-catalog-source.sh b/.rhdh/scripts/install-rhdh-catalog-source.sh index cf33d4139..a18f3d1d2 100755 --- a/.rhdh/scripts/install-rhdh-catalog-source.sh +++ b/.rhdh/scripts/install-rhdh-catalog-source.sh @@ -14,6 +14,7 @@ NAMESPACE_SUBSCRIPTION="rhdh-operator" OLM_CHANNEL="fast" UPSTREAM_IIB_OVERRIDE="" INSTALL_PLAN_APPROVAL="Automatic" +MAX_PARALLEL="${MAX_PARALLEL:-10}" # processes bundles in parallel. function logf() { set -euo pipefail @@ -150,56 +151,122 @@ function k8s_check_bundle_manifest_default_config() { echo "ok" } +# Process a single bundle: pull, rewrite refs, repack, push. +# Skips the expensive skopeo inspect — attempts the copy directly and handles failure. +# Writes sed replacement commands to a file for the caller to apply to render.yaml after all bundles complete. +function process_bundle() { + local bundleImg="$1" + local originalBundleImg="$2" + local digest="$3" + local my_registry="$4" + local internal_registry_url="$5" + local sed_commands_dir="$6" + local bundle_id="$7" + + local bundle_dir="bundles/${digest}" + mkdir -p "${bundle_dir}" + + # Failed copies are faster than successful inspects. + if ! skopeo copy "docker://$bundleImg" "oci:./${bundle_dir}/src:latest" 2>/dev/null; then + debugf "bundle #${bundle_id}: image not found on quay, skipping" >&2 + return 0 + fi + debugf "bundle #${bundle_id}: pulled ${bundleImg}" >&2 + + umoci unpack --image "./${bundle_dir}/src:latest" "./${bundle_dir}/unpacked" --rootless + + for folder in manifests metadata; do + for file in "./${bundle_dir}/unpacked/rootfs/${folder}"/*; do + if [ -f "$file" ]; then + sed -i 's#registry.redhat.io/rhdh#quay.io/rhdh#g' "$file" + sed -i 's#registry.stage.redhat.io/rhdh#quay.io/rhdh#g' "$file" + sed -i 's#registry-proxy.engineering.redhat.com/rh-osbs/rhdh-#quay.io/rhdh/#g' "$file" + fi + done + done + + umoci repack --image "./${bundle_dir}/src:latest" "./${bundle_dir}/unpacked" + + local newBundleImage="${my_registry}/rhdh/rhdh-operator-bundle:${digest}" + skopeo copy --dest-tls-verify=false "oci:./${bundle_dir}/src:latest" "docker://${newBundleImage}" + debugf "bundle #${bundle_id}: pushed to ${newBundleImage}" >&2 + + local newBundleImageAsInt="${internal_registry_url}/rhdh/rhdh-operator-bundle:${digest}" + # Each worker writes to its own file (keyed by digest) — no locking needed. + echo "s#${originalBundleImg}#${newBundleImageAsInt}#g" > "${sed_commands_dir}/${digest}.sed" +} + function update_refs_in_iib_bundles() { set -euo pipefail local internal_registry_url="$1" local my_registry="$2" - # 2. Render the IIB locally, modify any references to the internal registries with their mirrors on Quay - # and push the updates to the internal cluster registry - for bundleImg in $(grep -E '^image: .*operator-bundle' "${TMPDIR}/rhdh/rhdh/render.yaml" | awk '{print $2}' | uniq); do - originalBundleImg="$bundleImg" - digest="${originalBundleImg##*@sha256:}" + + local bundle_images + bundle_images=$(grep -E '^image: .*operator-bundle' "${TMPDIR}/rhdh/rhdh/render.yaml" | awk '{print $2}' | uniq) + + local total_bundles + total_bundles=$(echo "$bundle_images" | wc -l | tr -d ' ') + infof "Processing ${total_bundles} bundles (max ${MAX_PARALLEL} parallel)..." >&2 + + local sed_commands_dir="${TMPDIR}/sed_commands" + mkdir -p "$sed_commands_dir" + + local bundle_count=0 + local pids=() + + for bundleImg in $bundle_images; do + bundle_count=$((bundle_count + 1)) + local originalBundleImg="$bundleImg" + local digest="${originalBundleImg##*@sha256:}" bundleImg="${bundleImg/registry.stage.redhat.io/quay.io}" bundleImg="${bundleImg/registry.redhat.io/quay.io}" bundleImg="${bundleImg/registry-proxy.engineering.redhat.com\/rh-osbs\/rhdh-/quay.io\/rhdh\/}" - debugf "$originalBundleImg => $bundleImg" - if skopeo inspect "docker://$bundleImg" &> /dev/null; then - newBundleImage="${my_registry}/rhdh/rhdh-operator-bundle:${digest}" - newBundleImageAsInt="${internal_registry_url}/rhdh/rhdh-operator-bundle:${digest}" - mkdir -p "bundles/$digest" - - debugf "Copying and unpacking image $bundleImg locally..." - skopeo copy "docker://$bundleImg" "oci:./bundles/${digest}/src:latest" - umoci unpack --image "./bundles/${digest}/src:latest" "./bundles/${digest}/unpacked" --rootless - - # Replace the occurrences in the .csv.yaml or .clusterserviceversion.yaml files - debugf "Replacing refs to internal registry in bundle image $bundleImg..." - for folder in manifests metadata; do - for file in "./bundles/${digest}/unpacked/rootfs/${folder}"/*; do - if [ -f "$file" ]; then - debugf "replacing refs to internal registries in file '${file}'" - sed -i 's#registry.redhat.io/rhdh#quay.io/rhdh#g' "$file" - sed -i 's#registry.stage.redhat.io/rhdh#quay.io/rhdh#g' "$file" - sed -i 's#registry-proxy.engineering.redhat.com/rh-osbs/rhdh-#quay.io/rhdh/#g' "$file" - fi - done + debugf "bundle #${bundle_count}/${total_bundles}: $originalBundleImg => $bundleImg" >&2 + + # Throttle: wait until a slot opens (portable, no wait -n needed) + while true; do + local running=0 + for pid in ${pids[@]+"${pids[@]}"}; do + if kill -0 "$pid" 2>/dev/null; then + running=$((running + 1)) + fi done + if [[ $running -lt $MAX_PARALLEL ]]; then + break + fi + sleep 0.2 + done - # repack the image with the changes - debugf "Repacking image ./bundles/${digest}/src => ./bundles/${digest}/unpacked..." - umoci repack --image "./bundles/${digest}/src:latest" "./bundles/${digest}/unpacked" - - # Push the bundle to the internal cluster registry - debugf "Pushing updated image: ./bundles/${digest}/src => ${newBundleImage}..." - skopeo copy --dest-tls-verify=false "oci:./bundles/${digest}/src:latest" "docker://${newBundleImage}" + process_bundle "$bundleImg" "$originalBundleImg" "$digest" "$my_registry" "$internal_registry_url" "$sed_commands_dir" "$bundle_count" & + pids+=($!) + done - sed -i "s#${originalBundleImg}#${newBundleImageAsInt}#g" "${TMPDIR}/rhdh/rhdh/render.yaml" + # Wait for all remaining background bundle jobs + local failed=0 + for pid in ${pids[@]+"${pids[@]}"}; do + if ! wait "$pid"; then + failed=$((failed + 1)) fi done + if [[ $failed -gt 0 ]]; then + errorf "${failed} bundle(s) failed to process" >&2 + return 1 + fi - # 3. Regenerate the IIB image with the local changes to the render.yaml file and build and push it from within the cluster - debugf "Regenerating IIB Dockerfile with updated refs..." + # Apply all sed replacements to render.yaml + local sed_files + sed_files=$(find "$sed_commands_dir" -name '*.sed' 2>/dev/null) + if [[ -n "$sed_files" ]]; then + local combined_sed="${TMPDIR}/combined_sed_commands.txt" + cat "$sed_commands_dir"/*.sed > "$combined_sed" + local replacement_count + replacement_count=$(wc -l < "$combined_sed" | tr -d ' ') + infof "Applying ${replacement_count} image ref replacements to render.yaml..." >&2 + sed -i -f "$combined_sed" "${TMPDIR}/rhdh/rhdh/render.yaml" + fi + + debugf "Regenerating IIB Dockerfile with updated refs..." >&2 opm generate dockerfile rhdh/rhdh } @@ -216,7 +283,9 @@ function ocp_install() { set -euo pipefail - render_iib >&2 + # Run opm render and registry setup in parallel — they're independent. + render_iib >&2 & + local render_pid=$! # 1. Expose the internal cluster registry if not done already debugf "Exposing cluster registry..." >&2 @@ -224,12 +293,8 @@ function ocp_install() { oc patch configs.imageregistry.operator.openshift.io/cluster --patch '{"spec":{"defaultRoute":true}}' --type=merge >&2 my_registry=$(oc get route default-route -n openshift-image-registry --template='{{ .spec.host }}') skopeo login -u kubeadmin -p "$(oc whoami -t)" --tls-verify=false "$my_registry" >&2 - if oc -n openshift-marketplace get secret internal-reg-auth-for-rhdh &> /dev/null; then - oc -n openshift-marketplace delete secret internal-reg-auth-for-rhdh >&2 - fi - if oc -n openshift-marketplace get secret internal-reg-ext-auth-for-rhdh &> /dev/null; then - oc -n openshift-marketplace delete secret internal-reg-ext-auth-for-rhdh >&2 - fi + oc -n openshift-marketplace delete secret internal-reg-auth-for-rhdh --ignore-not-found >&2 + oc -n openshift-marketplace delete secret internal-reg-ext-auth-for-rhdh --ignore-not-found >&2 oc -n openshift-marketplace create secret docker-registry internal-reg-ext-auth-for-rhdh \ --docker-server="${my_registry}" \ --docker-username=kubeadmin \ @@ -250,6 +315,12 @@ function ocp_install() { oc policy add-role-to-user system:image-puller system:serviceaccount:openshift-marketplace:default -n openshift-marketplace >&2 || true oc policy add-role-to-user system:image-puller system:serviceaccount:rhdh-operator:default -n rhdh-operator >&2 || true + # Wait for opm render to finish before processing bundles + if ! wait "$render_pid"; then + errorf "opm render failed" >&2 + return 1 + fi + # 3. Regenerate the IIB image with the local changes to the render.yaml file and build and push it from within the cluster update_refs_in_iib_bundles "$internal_registry_url" "$my_registry" >&2 @@ -750,6 +821,10 @@ if [[ "${IS_OPENSHIFT}" = "true" ]]; then NAMESPACE_CATALOGSOURCE="openshift-marketplace" fi +# Delete existing CatalogSource first to force OLM to re-pull the image. +# Without this, if the tag is unchanged but the digest changed (rebuilt IIB), OLM reports "unchanged" and never re-indexes. +invoke_cluster_cli delete catalogsource "${CATALOGSOURCE_NAME}" -n "${NAMESPACE_CATALOGSOURCE}" --ignore-not-found + echo "apiVersion: operators.coreos.com/v1alpha1 kind: CatalogSource metadata: From 2fe5084ca9deb606209b7381fa29ddfe390f0fad Mon Sep 17 00:00:00 2001 From: Subhash Khileri Date: Wed, 20 May 2026 13:56:24 +0530 Subject: [PATCH 2/6] fix(scripts): add set -euo pipefail to process_bundle and preserve skopeo stderr Background subshells don't inherit set -e from the parent, so intermediate failures (umoci, skopeo push) went undetected and the worker would write a .sed entry for a broken bundle. Also redirect speculative copy stderr to a per-bundle file instead of /dev/null so auth failures, timeouts, and disk errors are debuggable. Assisted-by: Claude Code Co-Authored-By: Claude Code --- .rhdh/scripts/install-rhdh-catalog-source.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.rhdh/scripts/install-rhdh-catalog-source.sh b/.rhdh/scripts/install-rhdh-catalog-source.sh index a18f3d1d2..438c09eec 100755 --- a/.rhdh/scripts/install-rhdh-catalog-source.sh +++ b/.rhdh/scripts/install-rhdh-catalog-source.sh @@ -155,6 +155,8 @@ function k8s_check_bundle_manifest_default_config() { # Skips the expensive skopeo inspect — attempts the copy directly and handles failure. # Writes sed replacement commands to a file for the caller to apply to render.yaml after all bundles complete. function process_bundle() { + set -euo pipefail + local bundleImg="$1" local originalBundleImg="$2" local digest="$3" @@ -167,8 +169,8 @@ function process_bundle() { mkdir -p "${bundle_dir}" # Failed copies are faster than successful inspects. - if ! skopeo copy "docker://$bundleImg" "oci:./${bundle_dir}/src:latest" 2>/dev/null; then - debugf "bundle #${bundle_id}: image not found on quay, skipping" >&2 + if ! skopeo copy "docker://$bundleImg" "oci:./${bundle_dir}/src:latest" 2>"${bundle_dir}/copy.err"; then + debugf "bundle #${bundle_id}: skopeo copy failed, skipping (see ${bundle_dir}/copy.err)" >&2 return 0 fi debugf "bundle #${bundle_id}: pulled ${bundleImg}" >&2 From ca89fc68ee8bf4c0bb1a0432b409a93b6f9bd667 Mon Sep 17 00:00:00 2001 From: Subhash Khileri Date: Wed, 20 May 2026 14:46:02 +0530 Subject: [PATCH 3/6] chore(scripts): clean up comments to focus on WHY, not WHAT Assisted-by: Claude Code Co-Authored-By: Claude Code --- .rhdh/scripts/install-rhdh-catalog-source.sh | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/.rhdh/scripts/install-rhdh-catalog-source.sh b/.rhdh/scripts/install-rhdh-catalog-source.sh index 438c09eec..b29094665 100755 --- a/.rhdh/scripts/install-rhdh-catalog-source.sh +++ b/.rhdh/scripts/install-rhdh-catalog-source.sh @@ -151,9 +151,7 @@ function k8s_check_bundle_manifest_default_config() { echo "ok" } -# Process a single bundle: pull, rewrite refs, repack, push. -# Skips the expensive skopeo inspect — attempts the copy directly and handles failure. -# Writes sed replacement commands to a file for the caller to apply to render.yaml after all bundles complete. +# Writes sed replacement commands to sed_commands_dir for the caller to batch-apply after all bundles complete. function process_bundle() { set -euo pipefail @@ -226,7 +224,7 @@ function update_refs_in_iib_bundles() { bundleImg="${bundleImg/registry-proxy.engineering.redhat.com\/rh-osbs\/rhdh-/quay.io\/rhdh\/}" debugf "bundle #${bundle_count}/${total_bundles}: $originalBundleImg => $bundleImg" >&2 - # Throttle: wait until a slot opens (portable, no wait -n needed) + # Portable alternative to `wait -n` (not available in all bash versions) while true; do local running=0 for pid in ${pids[@]+"${pids[@]}"}; do @@ -244,7 +242,6 @@ function update_refs_in_iib_bundles() { pids+=($!) done - # Wait for all remaining background bundle jobs local failed=0 for pid in ${pids[@]+"${pids[@]}"}; do if ! wait "$pid"; then @@ -256,7 +253,6 @@ function update_refs_in_iib_bundles() { return 1 fi - # Apply all sed replacements to render.yaml local sed_files sed_files=$(find "$sed_commands_dir" -name '*.sed' 2>/dev/null) if [[ -n "$sed_files" ]]; then @@ -285,7 +281,7 @@ function ocp_install() { set -euo pipefail - # Run opm render and registry setup in parallel — they're independent. + # render_iib is independent of registry setup below, so run concurrently. render_iib >&2 & local render_pid=$! @@ -317,7 +313,6 @@ function ocp_install() { oc policy add-role-to-user system:image-puller system:serviceaccount:openshift-marketplace:default -n openshift-marketplace >&2 || true oc policy add-role-to-user system:image-puller system:serviceaccount:rhdh-operator:default -n rhdh-operator >&2 || true - # Wait for opm render to finish before processing bundles if ! wait "$render_pid"; then errorf "opm render failed" >&2 return 1 From 879e8b3b60231f633d72cee885b686af89d1ecbf Mon Sep 17 00:00:00 2001 From: Subhash Khileri Date: Wed, 20 May 2026 17:37:46 +0530 Subject: [PATCH 4/6] chore(scripts): clarify MAX_PARALLEL comment Assisted-by: Claude Code Co-Authored-By: Claude Code --- .rhdh/scripts/install-rhdh-catalog-source.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.rhdh/scripts/install-rhdh-catalog-source.sh b/.rhdh/scripts/install-rhdh-catalog-source.sh index b29094665..50aeb8aea 100755 --- a/.rhdh/scripts/install-rhdh-catalog-source.sh +++ b/.rhdh/scripts/install-rhdh-catalog-source.sh @@ -14,7 +14,7 @@ NAMESPACE_SUBSCRIPTION="rhdh-operator" OLM_CHANNEL="fast" UPSTREAM_IIB_OVERRIDE="" INSTALL_PLAN_APPROVAL="Automatic" -MAX_PARALLEL="${MAX_PARALLEL:-10}" # processes bundles in parallel. +MAX_PARALLEL="${MAX_PARALLEL:-10}" # max concurrent bundle workers in process_bundle function logf() { set -euo pipefail From 4b7acf8a8918ea86ae47045572d690034b08d938 Mon Sep 17 00:00:00 2001 From: Subhash Khileri Date: Mon, 25 May 2026 12:43:33 +0530 Subject: [PATCH 5/6] fix(scripts): validate MAX_PARALLEL and fix zombie processes on exit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Validate MAX_PARALLEL is a positive integer, exit with clear error otherwise (prevents infinite hang with 0 or crash with non-numeric) - Consolidate 3 separate trap EXIT calls into one — they were overwriting each other, so only the last one ran (pre-existing bug causing kubectl port-forward zombies and TMPDIR not being cleaned) - Remove unused kanikoLogsPid variable Assisted-by: Claude Code Co-Authored-By: Claude Code --- .rhdh/scripts/install-rhdh-catalog-source.sh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/.rhdh/scripts/install-rhdh-catalog-source.sh b/.rhdh/scripts/install-rhdh-catalog-source.sh index 50aeb8aea..c91116f1d 100755 --- a/.rhdh/scripts/install-rhdh-catalog-source.sh +++ b/.rhdh/scripts/install-rhdh-catalog-source.sh @@ -14,7 +14,11 @@ NAMESPACE_SUBSCRIPTION="rhdh-operator" OLM_CHANNEL="fast" UPSTREAM_IIB_OVERRIDE="" INSTALL_PLAN_APPROVAL="Automatic" -MAX_PARALLEL="${MAX_PARALLEL:-10}" # max concurrent bundle workers in process_bundle +MAX_PARALLEL="${MAX_PARALLEL:-10}" +if ! [[ "$MAX_PARALLEL" =~ ^[0-9]+$ ]] || [[ "$MAX_PARALLEL" -lt 1 ]]; then + echo "[ERROR] MAX_PARALLEL must be a positive integer, got: '$MAX_PARALLEL'" >&2 + exit 1 +fi function logf() { set -euo pipefail @@ -475,7 +479,6 @@ EOF cat "${registry_port_fwd_out}" return 1 fi - trap '[[ -n "${port_fwd_pid:-}" ]] && kill ${port_fwd_pid} || true' EXIT local portFwdLocalPort portFwdLocalPort=$(grep -oP '127\.0\.0\.1:\K[0-9]+' "${registry_port_fwd_out}") @@ -526,7 +529,6 @@ EOF local timestamp local kanikoJobName local kanikoPod - local kanikoLogsPid local localContext timestamp=$(date +%s) kanikoJobName="kaniko-build-${timestamp}" @@ -590,8 +592,6 @@ EOF debugf "Waiting for Kaniko pod $kanikoPod to be ready..." >&2 invoke_cluster_cli -n "${namespace}" wait --for=condition=Ready "pod/$kanikoPod" --timeout=60s >&2 invoke_cluster_cli -n "${namespace}" logs -f "${kanikoPod}" >&2 & - kanikoLogsPid=$! - trap '[[ -n "${kanikoLogsPid:-}" ]] && kill ${kanikoLogsPid} &>/dev/null || true' EXIT localContext=context.tar.gz tar -czf "${localContext}" -C rhdh . >&2 @@ -624,8 +624,9 @@ fi TMPDIR=$(mktemp -d) pushd "${TMPDIR}" > /dev/null debugf ">>> WORKING DIR: $TMPDIR <<<" + # shellcheck disable=SC2064 -trap "rm -fr $TMPDIR || true" EXIT +trap "rm -fr $TMPDIR || true; kill 0" EXIT INT TERM detect_ocp_and_set_env_var if [[ "${IS_OPENSHIFT}" = "true" ]]; then From 9adc27ded579f7a17cd785745ada58f70504e4bf Mon Sep 17 00:00:00 2001 From: Subhash Khileri Date: Thu, 28 May 2026 11:00:28 +0530 Subject: [PATCH 6/6] fix(scripts): replace kill 0 with jobs -p to avoid killing parent process group kill 0 sends SIGTERM to the entire process group including the parent shell/CI harness, causing segfaults on normal exit. Use jobs -p to target only this script's background jobs. Split INT/TERM from EXIT to avoid re-entrant cleanup. Assisted-by: Claude Code Co-Authored-By: Claude Code --- .rhdh/scripts/install-rhdh-catalog-source.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.rhdh/scripts/install-rhdh-catalog-source.sh b/.rhdh/scripts/install-rhdh-catalog-source.sh index c91116f1d..770f840bb 100755 --- a/.rhdh/scripts/install-rhdh-catalog-source.sh +++ b/.rhdh/scripts/install-rhdh-catalog-source.sh @@ -626,7 +626,8 @@ pushd "${TMPDIR}" > /dev/null debugf ">>> WORKING DIR: $TMPDIR <<<" # shellcheck disable=SC2064 -trap "rm -fr $TMPDIR || true; kill 0" EXIT INT TERM +trap "rm -fr '$TMPDIR' || true; jobs -p | xargs -r kill 2>/dev/null; wait 2>/dev/null" EXIT +trap "exit 1" INT TERM detect_ocp_and_set_env_var if [[ "${IS_OPENSHIFT}" = "true" ]]; then