diff --git a/.github/precompiled-matrix-config.json b/.github/precompiled-matrix-config.json index 84d0bb8b5..16a7e6448 100644 --- a/.github/precompiled-matrix-config.json +++ b/.github/precompiled-matrix-config.json @@ -3,6 +3,7 @@ "kernel_flavors": ["aws", "azure", "azure-fde", "generic", "nvidia", "oracle"], "dist": ["ubuntu22.04", "ubuntu24.04"], "lts_kernel": ["5.15", "6.8"], + "platforms": ["amd64", "arm64"], "exclude_build_matrix_pairs": [], "exclude_precompiled_build_matrix": [ { "lts_kernel": "5.15", "dist": "ubuntu24.04" }, diff --git a/.github/workflows/image.yaml b/.github/workflows/image.yaml index 36429ad12..cdfd86ba0 100644 --- a/.github/workflows/image.yaml +++ b/.github/workflows/image.yaml @@ -193,5 +193,11 @@ jobs: PRECOMPILED: "true" DIST: signed_${{ matrix.dist }} run: | - source kernel_version.txt && \ + source kernel_version.txt + # arm64 does not support azure-fde (package linux-objects-nvidia-*-azure-fde not available for arm64) + if [[ "${{ matrix.dist }}" == "ubuntu24.04" ]] && [[ "${{ matrix.flavor }}" != "azure-fde" ]]; then + export DOCKER_BUILD_PLATFORM_OPTIONS="--platform=linux/amd64,linux/arm64" + else + export DOCKER_BUILD_PLATFORM_OPTIONS="--platform=linux/amd64" + fi make DRIVER_VERSIONS=${DRIVER_VERSIONS} DRIVER_BRANCH=${{ matrix.driver }} build-${DIST}-${DRIVER_VERSION} diff --git a/.github/workflows/precompiled.yaml b/.github/workflows/precompiled.yaml index 5840a4050..6b3a3f437 100644 --- a/.github/workflows/precompiled.yaml +++ b/.github/workflows/precompiled.yaml @@ -35,6 +35,7 @@ jobs: kernel_flavors: ${{ steps.generate_matrix_config.outputs.kernel_flavors }} dist: ${{ steps.generate_matrix_config.outputs.dist }} lts_kernel: ${{ steps.generate_matrix_config.outputs.lts_kernel }} + platforms: ${{ steps.generate_matrix_config.outputs.platforms }} steps: - name: Checkout code uses: actions/checkout@v6 @@ -50,6 +51,7 @@ jobs: echo "exclude_build_matrix_pairs=$(jq -c '.exclude_build_matrix_pairs' "$CONFIG_FILE")" >> "$GITHUB_OUTPUT" echo "exclude_precompiled_build_matrix=$(jq -c '.exclude_precompiled_build_matrix' "$CONFIG_FILE")" >> "$GITHUB_OUTPUT" echo "exclude_precompiled_e2e_matrix=$(jq -c '.exclude_precompiled_e2e_matrix' "$CONFIG_FILE")" >> "$GITHUB_OUTPUT" + echo "platforms=$(jq -c '.platforms' "$CONFIG_FILE")" >> "$GITHUB_OUTPUT" precompiled-build-image: needs: set-driver-version-matrix @@ -74,7 +76,7 @@ jobs: GENERATE_ARTIFACTS="false" echo "PUSH_ON_BUILD=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV - echo "BUILD_MULTI_ARCH_IMAGES=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV + echo "BUILD_MULTI_ARCH_IMAGES=true" >> $GITHUB_ENV - name: Set CVE updates uses: ./.github/actions/set-cve-updates @@ -102,6 +104,7 @@ jobs: elif [[ "${{ matrix.dist }}" == "ubuntu24.04" ]]; then BASE_TARGET="noble" fi + export DOCKER_BUILD_PLATFORM_OPTIONS="--platform=linux/amd64" make DRIVER_BRANCH=${{ matrix.driver_branch }} KERNEL_FLAVOR=${{ matrix.flavor }} LTS_KERNEL=${LTS_KERNEL} build-base-${BASE_TARGET} trap "docker rm -f base-${BASE_TARGET}-${{ matrix.flavor }}" EXIT @@ -117,10 +120,16 @@ jobs: PRECOMPILED: "true" DIST: signed_${{ matrix.dist }} run: | - source kernel_version.txt && \ - make DRIVER_VERSIONS=${DRIVER_VERSIONS} DRIVER_BRANCH=${{ matrix.driver_branch }} build-${DIST}-${DRIVER_VERSION} - - - name: Save base image, build image and kernel version file + source kernel_version.txt + export DOCKER_BUILD_OPTIONS="--output=type=oci,dest=./driver-images-${{ matrix.driver_branch }}-${KERNEL_VERSION}-${{ matrix.dist }}.tar" + # build multi-arch images for ubuntu24.04 except azure-fde , arm64 does not support azure-fde + if [[ "${{ matrix.dist }}" == "ubuntu24.04" ]] && [[ "${{ matrix.flavor }}" != "azure-fde" ]]; then + export DOCKER_BUILD_PLATFORM_OPTIONS="--platform=linux/amd64,linux/arm64" + else + export DOCKER_BUILD_PLATFORM_OPTIONS="--platform=linux/amd64" + fi + make DRIVER_VERSIONS=${DRIVER_VERSIONS} DRIVER_BRANCH=${{ matrix.driver_branch }} KERNEL_VERSION=${KERNEL_VERSION} build-${DIST}-${DRIVER_VERSION} + - name: Save base image and kernel version file env: DIST: ${{ matrix.dist }} PRIVATE_REGISTRY: "ghcr.io" @@ -135,8 +144,6 @@ jobs: tar -cvf kernel-version-${{ matrix.driver_branch }}-${KERNEL_VERSION}-${DIST}.tar kernel_version.txt docker save "${PRIVATE_REGISTRY}/nvidia/driver:base-${BASE_TARGET}-${LTS_KERNEL}-${{ matrix.flavor }}-${{ matrix.driver_branch }}" \ -o ./base-images-${{ matrix.driver_branch }}-${KERNEL_VERSION}-${DIST}.tar - docker save "${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver_branch }}-${KERNEL_VERSION}-${DIST}" \ - -o ./driver-images-${{ matrix.driver_branch }}-${KERNEL_VERSION}-${DIST}.tar # set env for artifacts upload echo "KERNEL_VERSION=$KERNEL_VERSION" >> $GITHUB_ENV echo "DIST=$DIST" >> $GITHUB_ENV @@ -185,9 +192,9 @@ jobs: - name: Download all kernel-version artifacts uses: actions/download-artifact@v8 with: - pattern: kernel-version-* + pattern: kernel-version*${{ matrix.lts_kernel }}*${{ matrix.dist }} path: ./kernel-version-artifacts - merge-multiple: false + merge-multiple: true - name: Set kernel version env: @@ -207,21 +214,35 @@ jobs: fi done source ./tests/scripts/ci-precompiled-helpers.sh - KERNEL_VERSIONS=($(get_kernel_versions_to_test KERNEL_FLAVORS[@] DRIVER_BRANCHES[@] $DIST $LTS_KERNEL)) - if [ -z "$KERNEL_VERSIONS" ]; then - # no new kernel release - echo "Skipping e2e tests" - exit 0 - fi - # Convert array to JSON format and assign - echo "[]" > ./matrix_values_${{ matrix.dist }}_${{ matrix.lts_kernel }}.json - printf '%s\n' "${KERNEL_VERSIONS[@]}" | jq -R . | jq -s . > ./matrix_values_${{ matrix.dist }}_${{ matrix.lts_kernel }}.json + platforms_json='${{ needs.set-driver-version-matrix.outputs.platforms }}' + platform=$(echo "$platforms_json" | jq -r '.[]') + for PLATFORM in $platform; do + if [[ "$PLATFORM" == "arm64" ]] && [[ "$DIST" == "ubuntu22.04" ]]; then + continue + fi + if [[ "$PLATFORM" == "arm64" ]]; then + PLATFORM_SUFFIX="-arm64" + FLAVORS_FOR_PLATFORM=() + for f in "${KERNEL_FLAVORS[@]}"; do + if [[ "$f" != "azure-fde" ]]; then + FLAVORS_FOR_PLATFORM+=("$f") + fi + done + else + PLATFORM_SUFFIX="" + FLAVORS_FOR_PLATFORM=("${KERNEL_FLAVORS[@]}") + fi + KERNEL_VERSIONS=($(get_kernel_versions_to_test FLAVORS_FOR_PLATFORM[@] DRIVER_BRANCHES[@] $DIST $LTS_KERNEL $PLATFORM_SUFFIX)) + if [ -n "${KERNEL_VERSIONS[*]}" ]; then + printf '%s\n' "${KERNEL_VERSIONS[@]}" | jq -R . | jq -s . > ./matrix_values_${{ matrix.dist }}_${{ matrix.lts_kernel }}$PLATFORM_SUFFIX.json + fi + done - name: Upload kernel matrix values as artifacts uses: actions/upload-artifact@v7 with: name: matrix-values-${{ matrix.dist }}-${{ matrix.lts_kernel }} - path: ./matrix_values_${{ matrix.dist }}_${{ matrix.lts_kernel }}.json + path: ./matrix_values_${{ matrix.dist }}_${{ matrix.lts_kernel }}*.json retention-days: 1 collect-e2e-test-matrix: @@ -250,31 +271,13 @@ jobs: echo "matrix_values_not_empty=0" >> $GITHUB_OUTPUT kernel_versions=() - # Read and merge kernel_version values from dist files - dist_json='${{ needs.set-driver-version-matrix.outputs.dist }}' - DIST=($(echo "$dist_json" | jq -r '.[]')) - lts_kernel_json='${{ needs.set-driver-version-matrix.outputs.lts_kernel }}' - LTS_KERNEL=($(echo "$lts_kernel_json" | jq -r '.[]')) - for dist in "${DIST[@]}"; do - for kernel in "${LTS_KERNEL[@]}"; do - artifact_name="matrix-values-${dist}-${kernel}" - file_path="./matrix-values-artifacts/${artifact_name}/matrix_values_${dist}_${kernel}.json" - flat_path="./matrix-values-artifacts/matrix_values_${dist}_${kernel}.json" - if [ -f "$file_path" ]; then - echo "Successfully found artifact: $artifact_name at $file_path" - value=$(jq -r '.[]' "$file_path") - kernel_versions+=($value) - echo "matrix_values_not_empty=1" >> $GITHUB_OUTPUT - elif [ -f "$flat_path" ]; then - echo "Successfully found artifact: $artifact_name at $flat_path" - value=$(jq -r '.[]' "$flat_path") - kernel_versions+=($value) - echo "matrix_values_not_empty=1" >> $GITHUB_OUTPUT - else - echo "Artifact not found: $artifact_name" - fi - done + # Read and merge kernel_version values from all platform artifacts (amd64 and arm64) + for f in $(find ./matrix-values-artifacts -name "matrix_values_*.json" -type f 2>/dev/null); do + value=$(jq -r '.[]' "$f") + kernel_versions+=($value) + echo "matrix_values_not_empty=1" >> $GITHUB_OUTPUT done + kernel_versions=($(printf '%s\n' "${kernel_versions[@]}" | sort -u)) echo "Collected Kernel Versions: ${kernel_versions[@]}" combined_values=$(printf '%s\n' "${kernel_versions[@]}" | jq -R . | jq -s -c . | tr -d ' \n') echo "Combined Kernel Versions JSON: $combined_values" @@ -327,29 +330,43 @@ jobs: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Download all driver image artifacts - uses: actions/download-artifact@v8 - with: - pattern: driver-images-*-${{ matrix.kernel_version }} - path: ./tests/ - merge-multiple: true - name: Set and Calculate test vars run: | echo "private_key=${{ github.workspace }}/key.pem" >> $GITHUB_ENV echo "${{ secrets.AWS_SSH_KEY }}" > ${{ github.workspace }}/key.pem && chmod 400 ${{ github.workspace }}/key.pem echo "PRIVATE_REGISTRY=ghcr.io" >> $GITHUB_ENV KERNEL_VERSION="${{ matrix.kernel_version }}" + if [[ "$KERNEL_VERSION" == *-arm64 ]]; then + echo "PLATFORM=arm64" >> $GITHUB_ENV + KERNEL_VERSION="${KERNEL_VERSION%-arm64}" + else + echo "PLATFORM=amd64" >> $GITHUB_ENV + fi # Extract the last segment after the last dash DIST=${KERNEL_VERSION##*-} echo "DIST=$DIST" >> $GITHUB_ENV KERNEL_VERSION=${KERNEL_VERSION%-*} echo "KERNEL_VERSION=$KERNEL_VERSION" >> $GITHUB_ENV - - name: Configure Holodeck e2e test config (kernel, instance) + - name: Download driver image artifacts + uses: actions/download-artifact@v8 + with: + pattern: driver-images-*-${{ env.KERNEL_VERSION }}-${{ env.DIST }}* + path: ./tests/ + merge-multiple: true + - name: Install skopeo + run: | + sudo apt-get update && sudo apt-get install -y skopeo + - name: Configure Holodeck e2e test config (kernel, OS, instance) run: | yq eval '.spec += {"kernel": {"version": strenv(KERNEL_VERSION)}}' -i tests/holodeck_ubuntu.yaml if [[ "$DIST" == "ubuntu24.04" ]]; then yq eval '.spec.instance.os = "ubuntu-24.04"' -i tests/holodeck_ubuntu.yaml fi + if [[ "$PLATFORM" == "arm64" ]]; then + yq eval '.spec.instance.image.architecture = strenv(PLATFORM)' -i tests/holodeck_ubuntu.yaml + yq eval '.spec.instance.type = "g5g.xlarge"' -i tests/holodeck_ubuntu.yaml + yq eval '.spec.instance.region = "us-west-2"' -i tests/holodeck_ubuntu.yaml + fi - name: Set up Holodeck uses: NVIDIA/holodeck@v0.3.4 @@ -397,6 +414,8 @@ jobs: # add escape character for space TEST_CASE_ARGS=$(printf '%q ' "$TEST_CASE_ARGS") IMAGE_PATH="./tests/driver-images-${DRIVER_VERSION}-${KERNEL_VERSION}-${DIST}.tar" + skopeo copy --override-os linux --override-arch "${PLATFORM}" "oci-archive:${IMAGE_PATH}" "docker-archive:./tests/tmp.tar:${PRIVATE_REGISTRY}/nvidia/driver:${DRIVER_VERSION}-${KERNEL_VERSION}-${DIST}" + mv "./tests/tmp.tar" "${IMAGE_PATH}" ./tests/ci-run-e2e.sh "${TEST_CASE}" "${TEST_CASE_ARGS}" ${IMAGE_PATH} || status=$? if [ $status -eq 1 ]; then echo "e2e validation failed for driver version $DRIVER_VERSION with status $status" @@ -421,25 +440,26 @@ jobs: - set-driver-version-matrix - collect-e2e-test-matrix - e2e-tests-nvidiadriver + env: + REGISTRY_AUTH_FILE: ${{ github.workspace }}/config.json strategy: + max-parallel: 5 matrix: driver_branch: ${{ fromJson(needs.set-driver-version-matrix.outputs.driver_branch) }} kernel_version: ${{ fromJson(needs.collect-e2e-test-matrix.outputs.matrix_values) }} steps: - name: Check out code uses: actions/checkout@v6 - - name: Login to GitHub Container Registry - uses: docker/login-action@v4 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - name: Set image vars id: set_image_vars run: | echo "PRIVATE_REGISTRY=ghcr.io" >> $GITHUB_ENV KERNEL_VERSION="${{ matrix.kernel_version }}" + if [[ "$KERNEL_VERSION" == *-arm64 ]]; then + KERNEL_VERSION="${KERNEL_VERSION%-arm64}" + fi + echo "KERNEL_VERSION=$KERNEL_VERSION" >> $GITHUB_ENV DIST="${KERNEL_VERSION##*-}" echo "run_publish=true" >> $GITHUB_OUTPUT if echo '${{ needs.set-driver-version-matrix.outputs.exclude_build_matrix_pairs }}' | jq -e \ @@ -448,29 +468,34 @@ jobs: echo "run_publish=false" >> $GITHUB_OUTPUT fi + - name: Install skopeo and login to GitHub Container Registry + run: | + sudo apt-get update && sudo apt-get install -y skopeo + mkdir -p "$(dirname "${REGISTRY_AUTH_FILE}")" + echo "${{ secrets.GITHUB_TOKEN }}" | skopeo login ${PRIVATE_REGISTRY} -u ${{ github.actor }} --password-stdin --authfile "${REGISTRY_AUTH_FILE}" + - name: Download base image artifact if: steps.set_image_vars.outputs.run_publish == 'true' uses: actions/download-artifact@v8 with: - name: base-images-${{ matrix.driver_branch }}-${{ matrix.kernel_version }} + name: base-images-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }} path: ./ - name: Publish base image if: steps.set_image_vars.outputs.run_publish == 'true' run: | - LTS_KERNEL=$(echo "${{ matrix.kernel_version }}" | sed -E 's/^([0-9]+\.[0-9]+)\..*/\1/') - KERNEL_FLAVOR=$(echo "${{ matrix.kernel_version }}" | sed -E 's/^[0-9]+\.[0-9]+\.[0-9]+-[0-9]+-(.*)-ubuntu[0-9]+\.[0-9]+$/\1/') - DIST=$(echo "${{ matrix.kernel_version }}" | sed -E 's/^.*-(ubuntu[0-9]+\.[0-9]+)$/\1/') + LTS_KERNEL=$(echo "${{ env.KERNEL_VERSION }}" | sed -E 's/^([0-9]+\.[0-9]+)\..*/\1/') + KERNEL_FLAVOR=$(echo "${{ env.KERNEL_VERSION }}" | sed -E 's/^[0-9]+\.[0-9]+\.[0-9]+-[0-9]+-(.*)-ubuntu[0-9]+\.[0-9]+$/\1/') + DIST=$(echo "${{ env.KERNEL_VERSION }}" | sed -E 's/^.*-(ubuntu[0-9]+\.[0-9]+)$/\1/') if [[ "${DIST}" == "ubuntu22.04" ]]; then BASE_TARGET="jammy" elif [[ "${DIST}" == "ubuntu24.04" ]]; then BASE_TARGET="noble" fi - image_path="./base-images-${{ matrix.driver_branch }}-${{ matrix.kernel_version }}.tar" + image_path="./base-images-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}.tar" echo "uploading $image_path" - docker load -i $image_path if [[ "${{ github.ref == 'refs/heads/main' }}" == "true" ]]; then - docker push ${PRIVATE_REGISTRY}/nvidia/driver:base-${BASE_TARGET}-${LTS_KERNEL}-${KERNEL_FLAVOR}-${{ matrix.driver_branch }} + skopeo copy --authfile "${REGISTRY_AUTH_FILE}" "docker-archive:${image_path}" docker://${PRIVATE_REGISTRY}/nvidia/driver:base-${BASE_TARGET}-${LTS_KERNEL}-${KERNEL_FLAVOR}-${{ matrix.driver_branch }} else echo "Skipping base image push for non-main branch ${{ github.ref }}" fi @@ -479,17 +504,16 @@ jobs: if: steps.set_image_vars.outputs.run_publish == 'true' uses: actions/download-artifact@v8 with: - name: driver-images-${{ matrix.driver_branch }}-${{ matrix.kernel_version }} + name: driver-images-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }} path: ./ - name: Publish image if: steps.set_image_vars.outputs.run_publish == 'true' run: | - image_path="./driver-images-${{ matrix.driver_branch }}-${{ matrix.kernel_version }}.tar" + image_path="./driver-images-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}.tar" echo "uploading $image_path" - docker load -i $image_path if [[ "${{ github.ref == 'refs/heads/main' }}" == "true" ]]; then - docker push ${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver_branch }}-${{ matrix.kernel_version }} + skopeo copy --authfile "${REGISTRY_AUTH_FILE}" "oci-archive:${image_path}" docker://${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }} else echo "Skipping image push for non-main branch ${{ github.ref }}" fi diff --git a/.nvidia-ci.yml b/.nvidia-ci.yml index f58b0ed0b..e58f425ab 100644 --- a/.nvidia-ci.yml +++ b/.nvidia-ci.yml @@ -335,6 +335,15 @@ scan-precompiled-ubuntu24.04-amd64: needs: - image-precompiled-ubuntu24.04 +scan-precompiled-ubuntu24.04-arm64: + variables: + PLATFORM: linux/arm64 + extends: + - .scan-precompiled-ubuntu24.04 + - .platform-arm64 + needs: + - image-precompiled-ubuntu24.04 + scan-precompiled-ubuntu22.04-amd64: variables: PLATFORM: linux/amd64 diff --git a/Makefile b/Makefile index b74bd4b5a..a54a64bb2 100644 --- a/Makefile +++ b/Makefile @@ -151,12 +151,13 @@ build-%: DOCKERFILE = $(CURDIR)/$(SUBDIR)/Dockerfile # build-ubuntu22.04-$(DRIVER_VERSION) triggers a build for a specific $(DRIVER_VERSION) $(DISTRIBUTIONS): %: build-% $(BUILD_TARGETS): %: $(foreach driver_version, $(DRIVER_VERSIONS), $(addprefix %-, $(driver_version))) +DRIVER_BUILD_TAG = $(if $(findstring type=oci,$(DOCKER_BUILD_OPTIONS)),,--tag $(IMAGE)) $(DRIVER_BUILD_TARGETS): DOCKER_BUILDKIT=1 \ $(DOCKER) $(BUILDX) build --pull \ $(DOCKER_BUILD_OPTIONS) \ $(DOCKER_BUILD_PLATFORM_OPTIONS) \ - --tag $(IMAGE) \ + $(DRIVER_BUILD_TAG) \ --build-arg DRIVER_VERSION="$(DRIVER_VERSION)" \ --build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \ --build-arg DRIVER_BRANCH="$(DRIVER_BRANCH)" \ @@ -202,6 +203,7 @@ $(BASE_BUILD_TARGETS): DOCKER_BUILDKIT=1 \ $(DOCKER) $(BUILDX) build --pull --no-cache \ $(DOCKER_BUILD_OPTIONS) \ + $(DOCKER_BUILD_PLATFORM_OPTIONS) \ --tag $(IMAGE) \ --target $(TARGET) \ --build-arg CUDA_VERSION="$(CUDA_VERSION)" \ diff --git a/multi-arch.mk b/multi-arch.mk index d5201eb70..de5851a7c 100644 --- a/multi-arch.mk +++ b/multi-arch.mk @@ -25,4 +25,3 @@ $(DRIVER_PUSH_TARGETS): push-%: # No multi-arch support for the following distributions build-signed_ubuntu22.04%: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64 -build-signed_ubuntu24.04%: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64 diff --git a/scripts/precompiled.sh b/scripts/precompiled.sh index 13311ca11..efebbcac0 100755 --- a/scripts/precompiled.sh +++ b/scripts/precompiled.sh @@ -42,7 +42,11 @@ function buildBaseImage(){ } function buildImage(){ - # Build the image + # Build the image. Build multi-arch (amd64+arm64) for ubuntu24.04 except azure-fde + # (linux-objects-nvidia-*-azure-fde is not available for arm64). + if [[ "$DIST" == "signed_ubuntu24.04" ]] && [[ "$KERNEL_FLAVOR" != "azure-fde" ]]; then + export DOCKER_BUILD_PLATFORM_OPTIONS="--platform=linux/amd64,linux/arm64" + fi make DRIVER_VERSIONS=${DRIVER_VERSIONS} DRIVER_BRANCH=${DRIVER_BRANCH} build-${DIST}-${DRIVER_VERSION} } diff --git a/tests/scripts/ci-precompiled-helpers.sh b/tests/scripts/ci-precompiled-helpers.sh index 3050049db..bce9b55f9 100644 --- a/tests/scripts/ci-precompiled-helpers.sh +++ b/tests/scripts/ci-precompiled-helpers.sh @@ -1,6 +1,6 @@ get_kernel_versions_to_test() { - if [[ "$#" -ne 4 ]]; then - echo " Error:$0 must be called with KERNEL_FLAVORS DRIVER_BRANCHES DIST LTS_KERNEL" >&2 + if [[ "$#" -lt 4 || "$#" -gt 5 ]]; then + echo " Error:$0 must be called with KERNEL_FLAVORS DRIVER_BRANCHES DIST LTS_KERNEL or KERNEL_FLAVORS DRIVER_BRANCHES DIST LTS_KERNEL PLATFORM_SUFFIX" >&2 exit 1 fi @@ -8,11 +8,11 @@ get_kernel_versions_to_test() { local -a DRIVER_BRANCHES=("${!2}") local DIST="$3" local LTS_KERNEL="$4" - + local PLATFORM_SUFFIX="$5" kernel_versions=() for kernel_flavor in "${KERNEL_FLAVORS[@]}"; do for DRIVER_BRANCH in "${DRIVER_BRANCHES[@]}"; do - source ./tests/scripts/findkernelversion.sh "${kernel_flavor}" "$DRIVER_BRANCH" "$DIST" "$LTS_KERNEL" >&2 + source ./tests/scripts/findkernelversion.sh "${kernel_flavor}" "$DRIVER_BRANCH" "$DIST" "$LTS_KERNEL" "$PLATFORM_SUFFIX" >&2 if [[ "$should_continue" == true ]]; then break fi @@ -25,7 +25,7 @@ get_kernel_versions_to_test() { # Remove duplicates kernel_versions=($(printf "%s\n" "${kernel_versions[@]}" | sort -u)) for i in "${!kernel_versions[@]}"; do - kernel_versions[$i]="${kernel_versions[$i]}-$DIST" + kernel_versions[$i]="${kernel_versions[$i]}-$DIST$PLATFORM_SUFFIX" done echo "${kernel_versions[@]}" } diff --git a/tests/scripts/findkernelversion.sh b/tests/scripts/findkernelversion.sh index 498ae7606..d2f322c1c 100755 --- a/tests/scripts/findkernelversion.sh +++ b/tests/scripts/findkernelversion.sh @@ -1,7 +1,7 @@ #!/bin/bash -if [[ $# -ne 4 ]]; then - echo " KERNEL_FLAVOR DRIVER_BRANCH DIST LTS_KERNEL are required" +if [[ $# -lt 4 || $# -gt 5 ]]; then + echo " KERNEL_FLAVOR DRIVER_BRANCH DIST LTS_KERNEL or KERNEL_FLAVOR DRIVER_BRANCH DIST LTS_KERNEL PLATFORM_SUFFIX are required" exit 1 fi @@ -9,6 +9,7 @@ export KERNEL_FLAVOR="${1}" export DRIVER_BRANCH="${2}" export DIST="${3}" export LTS_KERNEL="${4}" +export PLATFORM_SUFFIX="${5}" export REGCTL_VERSION=v0.7.1 mkdir -p bin @@ -21,21 +22,21 @@ prefix="kernel-version-${DRIVER_BRANCH}-${LTS_KERNEL}" suffix="${KERNEL_FLAVOR}-${DIST}" artifact_dir="./kernel-version-artifacts" -artifact=$(find "$artifact_dir" -maxdepth 1 -type d -name "${prefix}*-${suffix}" | head -1) -if [ -n "$artifact" ]; then - artifact_name=$(basename "$artifact") - if [ -f "$artifact/${artifact_name}.tar" ]; then - tar -xf "$artifact/${artifact_name}.tar" -C ./ - export $(grep -oP 'KERNEL_VERSION=[^ ]+' ./kernel_version.txt) - rm -f kernel_version.txt - fi +artifact_file=$(find "$artifact_dir" -maxdepth 1 -type f -name "${prefix}*-${suffix}.tar" | head -1) +if [ -n "$artifact_file" ]; then + tar -xf "$artifact_file" -C ./ + export $(grep -oE 'KERNEL_VERSION=[^ ]+' ./kernel_version.txt) + rm -f kernel_version.txt fi # calculate driver tag status_nvcr=0 status_ghcr=0 -regctl tag ls nvcr.io/nvidia/driver | grep "^${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST}$" || status_nvcr=$? -regctl tag ls ghcr.io/nvidia/driver | grep "^${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST}$" || status_ghcr=$? +PLATFORM=$(echo "${PLATFORM_SUFFIX}" | sed 's/-//') +[ -z "$PLATFORM" ] && PLATFORM=amd64 +regctl manifest inspect nvcr.io/nvidia/driver:${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST} --platform=linux/${PLATFORM} > /dev/null 2>&1; status_nvcr=$? +regctl manifest inspect ghcr.io/nvidia/driver:${DRIVER_BRANCH}-${KERNEL_VERSION}-${DIST} --platform=linux/${PLATFORM} > /dev/null 2>&1; status_ghcr=$? + if [[ $status_nvcr -eq 0 || $status_ghcr -eq 0 ]]; then export should_continue=false else diff --git a/ubuntu24.04/precompiled/Dockerfile b/ubuntu24.04/precompiled/Dockerfile index 892ed1004..7d06765c3 100644 --- a/ubuntu24.04/precompiled/Dockerfile +++ b/ubuntu24.04/precompiled/Dockerfile @@ -18,7 +18,8 @@ ENV NVIDIA_VISIBLE_DEVICES=void RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections -RUN dpkg --add-architecture i386 && \ +# Add i386 architecture only for amd64 builds (not available on ARM) +RUN if [ "$TARGETARCH" = "amd64" ]; then dpkg --add-architecture i386; fi && \ apt-get update && apt-get install -y --no-install-recommends \ apt-utils \ build-essential \ @@ -31,9 +32,10 @@ RUN dpkg --add-architecture i386 && \ pkg-config && \ rm -rf /var/lib/apt/lists/* -# Fetch GPG keys for CUDA repo -RUN rm -f /etc/apt/sources.list.d/cuda* && \ - curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb -o cuda-keyring_1.1-1_all.deb && \ +# Fetch GPG keys for CUDA repo (architecture-specific) +RUN CUDA_ARCH=$([ "$TARGETARCH" = "arm64" ] && echo "sbsa" || echo "x86_64") && \ + rm -f /etc/apt/sources.list.d/cuda* && \ + curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/${CUDA_ARCH}/cuda-keyring_1.1-1_all.deb -o cuda-keyring_1.1-1_all.deb && \ dpkg -i cuda-keyring_1.1-1_all.deb && \ rm -f cuda-keyring_1.1-1_all.deb diff --git a/ubuntu24.04/precompiled/local-repo.sh b/ubuntu24.04/precompiled/local-repo.sh index 48e18453d..804d5be28 100755 --- a/ubuntu24.04/precompiled/local-repo.sh +++ b/ubuntu24.04/precompiled/local-repo.sh @@ -68,7 +68,12 @@ download_driver_package_deps () { pushd ${LOCAL_REPO_DIR} download_apt_with_dep linux-objects-nvidia-${DRIVER_BRANCH}-server-${KERNEL_VERSION} - download_apt_with_dep linux-signatures-nvidia-${KERNEL_VERSION} + + # linux-signatures-nvidia (secure boot signatures) is not available for arm64 + if [ "$TARGETARCH" = "amd64" ]; then + download_apt_with_dep linux-signatures-nvidia-${KERNEL_VERSION} + fi + download_apt_with_dep linux-modules-nvidia-${DRIVER_BRANCH}-server-${KERNEL_VERSION} download_apt_with_dep linux-modules-nvidia-${DRIVER_BRANCH}-server-open-${KERNEL_VERSION} download_apt_with_dep nvidia-utils-${DRIVER_BRANCH}-server @@ -76,7 +81,10 @@ download_driver_package_deps () { download_apt_with_dep libnvidia-decode-${DRIVER_BRANCH}-server download_apt_with_dep libnvidia-extra-${DRIVER_BRANCH}-server download_apt_with_dep libnvidia-encode-${DRIVER_BRANCH}-server - download_apt_with_dep libnvidia-fbc1-${DRIVER_BRANCH}-server + # libnvidia-fbc1 (FrameBuffer Capture) is not available for arm64 + if [ "$TARGETARCH" = "amd64" ]; then + download_apt_with_dep libnvidia-fbc1-${DRIVER_BRANCH}-server + fi download_apt_with_dep libnvidia-gl-${DRIVER_BRANCH}-server fabricmanager_download diff --git a/ubuntu24.04/precompiled/nvidia-driver b/ubuntu24.04/precompiled/nvidia-driver index eb887b2d8..af04e78cb 100755 --- a/ubuntu24.04/precompiled/nvidia-driver +++ b/ubuntu24.04/precompiled/nvidia-driver @@ -416,22 +416,37 @@ _install_driver() { nvidia-headless-no-dkms-${DRIVER_BRANCH}-server \ libnvidia-decode-${DRIVER_BRANCH}-server \ libnvidia-extra-${DRIVER_BRANCH}-server \ - libnvidia-encode-${DRIVER_BRANCH}-server \ - libnvidia-fbc1-${DRIVER_BRANCH}-server \ - libnvidia-gl-${DRIVER_BRANCH}-server + libnvidia-encode-${DRIVER_BRANCH}-server + # libnvidia-fbc1 (FrameBuffer Capture) is not available for arm64 + if [ "$TARGETARCH" = "amd64" ]; then + apt-get install -y --no-install-recommends libnvidia-fbc1-${DRIVER_BRANCH}-server + fi + apt-get install -y --no-install-recommends libnvidia-gl-${DRIVER_BRANCH}-server # Now install the precompiled kernel module packages signed by Canonical + # linux-signatures-nvidia (secure boot signatures) is not available for arm64 if [ "$KERNEL_TYPE" = "kernel-open" ]; then echo "Installing Open NVIDIA driver kernel modules..." - apt-get install --no-install-recommends -y \ - linux-signatures-nvidia-${KERNEL_VERSION} \ - linux-modules-nvidia-${DRIVER_BRANCH}-server-open-${KERNEL_VERSION} + if [ "$TARGETARCH" = "amd64" ]; then + apt-get install --no-install-recommends -y \ + linux-signatures-nvidia-${KERNEL_VERSION} \ + linux-modules-nvidia-${DRIVER_BRANCH}-server-open-${KERNEL_VERSION} + else + apt-get install --no-install-recommends -y \ + linux-modules-nvidia-${DRIVER_BRANCH}-server-open-${KERNEL_VERSION} + fi else echo "Installing Closed NVIDIA driver kernel modules..." - apt-get install --no-install-recommends -y \ - linux-objects-nvidia-${DRIVER_BRANCH}-server-${KERNEL_VERSION} \ - linux-signatures-nvidia-${KERNEL_VERSION} \ - linux-modules-nvidia-${DRIVER_BRANCH}-server-${KERNEL_VERSION} + if [ "$TARGETARCH" = "amd64" ]; then + apt-get install --no-install-recommends -y \ + linux-objects-nvidia-${DRIVER_BRANCH}-server-${KERNEL_VERSION} \ + linux-signatures-nvidia-${KERNEL_VERSION} \ + linux-modules-nvidia-${DRIVER_BRANCH}-server-${KERNEL_VERSION} + else + apt-get install --no-install-recommends -y \ + linux-objects-nvidia-${DRIVER_BRANCH}-server-${KERNEL_VERSION} \ + linux-modules-nvidia-${DRIVER_BRANCH}-server-${KERNEL_VERSION} + fi fi }