From be4fa93e8eae69f009ac8e0621bc0e54544b5f31 Mon Sep 17 00:00:00 2001 From: Yuriy Bezsonov Date: Fri, 3 Jul 2026 17:07:16 +0200 Subject: [PATCH 1/2] fix(monitoring): Migrate Grafana Helm chart to grafana-community repo The grafana chart in grafana.github.io/helm-charts is now deprecated (frozen at chart 10.5.15 / Grafana 12.3.1, marked deprecated: true) after Grafana Labs forked the charts to grafana-community/helm-charts on March 16, 2026. Installs emit "WARNING: This chart is deprecated". Point the grafana chart at the maintained community repo (grafana-community/grafana, chart 12.7.2 / Grafana 13.1.0) in both monitoring.sh and perf-platform.sh. All values keys used by the scripts (admin, service, persistence, resources, sidecar, plugins, grafana.ini) remain valid in the new chart. The pyroscope chart was not moved, so perf-platform.sh keeps the original grafana repo alongside the new grafana-community repo for grafana/pyroscope. --- infra/scripts/setup/monitoring.sh | 4 ++-- infra/scripts/setup/perf-platform.sh | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/infra/scripts/setup/monitoring.sh b/infra/scripts/setup/monitoring.sh index 9274bc9e..d018da78 100755 --- a/infra/scripts/setup/monitoring.sh +++ b/infra/scripts/setup/monitoring.sh @@ -44,7 +44,7 @@ trap cleanup EXIT # Setup kubectl create namespace "$NAMESPACE" 2>/dev/null || true helm repo add prometheus-community https://prometheus-community.github.io/helm-charts || true -helm repo add grafana https://grafana.github.io/helm-charts || true +helm repo add grafana-community https://grafana-community.github.io/helm-charts || true helm repo update # Grafana secret @@ -152,7 +152,7 @@ grafana.ini: EOF log_info "Deploying Grafana..." -helm upgrade --install grafana grafana/grafana \ +helm upgrade --install grafana grafana-community/grafana \ --namespace "$NAMESPACE" \ --values "$GRAFANA_VALUES_FILE" diff --git a/infra/scripts/setup/perf-platform.sh b/infra/scripts/setup/perf-platform.sh index 5c9c98ba..6152ff17 100755 --- a/infra/scripts/setup/perf-platform.sh +++ b/infra/scripts/setup/perf-platform.sh @@ -41,6 +41,7 @@ kubectl get namespace "${NAMESPACE}" >/dev/null 2>&1 || { } helm repo add grafana https://grafana.github.io/helm-charts >/dev/null 2>&1 || true +helm repo add grafana-community https://grafana-community.github.io/helm-charts >/dev/null 2>&1 || true helm repo update >/dev/null CLUSTER_NAME="${PREFIX}-eks" @@ -288,7 +289,7 @@ GRAFANA_URL="http://${GRAFANA_LB}" # Install Profiles Drilldown plugin (idempotent). log_info "Installing Grafana Profiles Drilldown plugin..." -helm upgrade --install grafana grafana/grafana \ +helm upgrade --install grafana grafana-community/grafana \ --namespace "${NAMESPACE}" \ --reuse-values \ --set "plugins={grafana-pyroscope-app}" \ From f990da5a2d82ef3ea2055919788543bdece918e8 Mon Sep 17 00:00:00 2001 From: Yuriy Bezsonov Date: Fri, 3 Jul 2026 17:56:38 +0200 Subject: [PATCH 2/2] fix(monitoring): Use Recreate strategy for Grafana to avoid PVC deadlock The Grafana chart defaults to deploymentStrategy RollingUpdate, but the workshop enables persistence on a gp3 EBS volume (ReadWriteOnce). On the plugin upgrade in perf-platform.sh, RollingUpdate starts a new pod before deleting the old one; the new pod cannot attach the RWO volume still held by the old pod, so it hangs with a Multi-Attach error until helm --wait hits its deadline (UPGRADE FAILED: context deadline exceeded at line 292). Set deploymentStrategy.type: Recreate in the Grafana values so the old pod is terminated first and releases the volume. The value is persisted on the release, so perf-platform.sh picks it up via --reuse-values. Also raise the plugin-install --timeout from 5m to 10m to allow for the EBS detach/reattach cycle plus plugin download on the Recreate path. --- infra/scripts/setup/monitoring.sh | 7 +++++++ infra/scripts/setup/perf-platform.sh | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/infra/scripts/setup/monitoring.sh b/infra/scripts/setup/monitoring.sh index d018da78..5293c3f9 100755 --- a/infra/scripts/setup/monitoring.sh +++ b/infra/scripts/setup/monitoring.sh @@ -116,6 +116,13 @@ persistence: storageClassName: gp3 size: 10Gi +# The PVC above is ReadWriteOnce (EBS). RollingUpdate (the chart default) +# starts the new pod before the old one is deleted, so the new pod cannot +# attach the volume still held by the old pod and the upgrade deadlocks. +# Recreate terminates the old pod first, releasing the volume. +deploymentStrategy: + type: Recreate + resources: requests: cpu: 100m diff --git a/infra/scripts/setup/perf-platform.sh b/infra/scripts/setup/perf-platform.sh index 6152ff17..22c3acd9 100755 --- a/infra/scripts/setup/perf-platform.sh +++ b/infra/scripts/setup/perf-platform.sh @@ -293,7 +293,7 @@ helm upgrade --install grafana grafana-community/grafana \ --namespace "${NAMESPACE}" \ --reuse-values \ --set "plugins={grafana-pyroscope-app}" \ - --wait --timeout 5m + --wait --timeout 10m log_success "Profiles Drilldown plugin installed" # Wait for Grafana API to be ready after the helm upgrade restarts the pod.