diff --git a/infra/scripts/setup/monitoring.sh b/infra/scripts/setup/monitoring.sh index 9274bc9e..5293c3f9 100755 --- a/infra/scripts/setup/monitoring.sh +++ b/infra/scripts/setup/monitoring.sh @@ -44,7 +44,7 @@ trap cleanup EXIT # Setup kubectl create namespace "$NAMESPACE" 2>/dev/null || true helm repo add prometheus-community https://prometheus-community.github.io/helm-charts || true -helm repo add grafana https://grafana.github.io/helm-charts || true +helm repo add grafana-community https://grafana-community.github.io/helm-charts || true helm repo update # Grafana secret @@ -116,6 +116,13 @@ persistence: storageClassName: gp3 size: 10Gi +# The PVC above is ReadWriteOnce (EBS). RollingUpdate (the chart default) +# starts the new pod before the old one is deleted, so the new pod cannot +# attach the volume still held by the old pod and the upgrade deadlocks. +# Recreate terminates the old pod first, releasing the volume. +deploymentStrategy: + type: Recreate + resources: requests: cpu: 100m @@ -152,7 +159,7 @@ grafana.ini: EOF log_info "Deploying Grafana..." -helm upgrade --install grafana grafana/grafana \ +helm upgrade --install grafana grafana-community/grafana \ --namespace "$NAMESPACE" \ --values "$GRAFANA_VALUES_FILE" diff --git a/infra/scripts/setup/perf-platform.sh b/infra/scripts/setup/perf-platform.sh index 5c9c98ba..22c3acd9 100755 --- a/infra/scripts/setup/perf-platform.sh +++ b/infra/scripts/setup/perf-platform.sh @@ -41,6 +41,7 @@ kubectl get namespace "${NAMESPACE}" >/dev/null 2>&1 || { } helm repo add grafana https://grafana.github.io/helm-charts >/dev/null 2>&1 || true +helm repo add grafana-community https://grafana-community.github.io/helm-charts >/dev/null 2>&1 || true helm repo update >/dev/null CLUSTER_NAME="${PREFIX}-eks" @@ -288,11 +289,11 @@ GRAFANA_URL="http://${GRAFANA_LB}" # Install Profiles Drilldown plugin (idempotent). log_info "Installing Grafana Profiles Drilldown plugin..." -helm upgrade --install grafana grafana/grafana \ +helm upgrade --install grafana grafana-community/grafana \ --namespace "${NAMESPACE}" \ --reuse-values \ --set "plugins={grafana-pyroscope-app}" \ - --wait --timeout 5m + --wait --timeout 10m log_success "Profiles Drilldown plugin installed" # Wait for Grafana API to be ready after the helm upgrade restarts the pod.