Skip to content

Commit

Permalink
AZP/RELEASE: rm cuda-compat & separate gdrcopy
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexey-Rivkin committed Feb 27, 2023
1 parent f9e2f91 commit cd8182b
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 20 deletions.
3 changes: 2 additions & 1 deletion buildlib/az-distro-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,11 @@ jobs:
dpkg-buildpackage -us -uc -Pcuda
cd .. # Move back to the working directory
find . -name '*.deb'
VER="${POSTFIX#ucx-}" # Remove 'ucx' prefix from the POSTFIX string
VER="${POSTFIX#ucx-}" # Remove 'ucx-' prefix from the POSTFIX string
# Rename DEB files
find . -name "ucx*.deb" -exec bash -c 'mv "$1" "${1%%_*}-'"${VER}"'.deb"' _ {} \;
dpkg-deb -I "ucx-${VER}.deb"
dpkg-deb -I "ucx-cuda-${VER}.deb"
tar -cjf "${AZ_ARTIFACT_NAME}" *.deb # Package all DEBs
tar -tjf "${AZ_ARTIFACT_NAME}"
displayName: Build DEB package
Expand Down
2 changes: 1 addition & 1 deletion buildlib/azure-pipelines-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ resources:
- container: ubuntu20_cuda11
image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu20.04-mofed5-cuda11:3
- container: ubuntu22_cuda11
image: rdmz-harbor.rdmz.labs.mlnx/ucx/x86_64/ubuntu22.04-mofed5-cuda11:3
image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu22.04-mofed5-cuda11:3

stages:
- stage: Prepare
Expand Down
16 changes: 6 additions & 10 deletions buildlib/dockers/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
version: "3.4"

# CUDA runtime <-> driver versions compatibility per OS:
# https://developer.nvidia.com/cuda-11-7-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=22.04&target_type=runfile_local

services:
centos7-mofed5-cuda11:
image: centos7-mofed5-cuda11:2
Expand Down Expand Up @@ -34,16 +37,6 @@ services:
MOFED_OS: rhel8.2
CUDA_VERSION: 11.4.0
OS_VERSION: 8
ubuntu16.04-mofed5-cuda11:
image: ubuntu16.04-mofed5-cuda11:3
build:
context: .
network: host
dockerfile: ubuntu-release.Dockerfile
args:
MOFED_VERSION: 5.0-1.0.0.0
UBUNTU_VERSION: 16.04
CUDA_VERSION: 11.2.0
ubuntu18.04-mofed5-cuda11:
image: ubuntu18.04-mofed5-cuda11:3
build:
Expand All @@ -54,6 +47,7 @@ services:
MOFED_VERSION: 5.0-1.0.0.0
UBUNTU_VERSION: 18.04
CUDA_VERSION: 11.4.0
DRIVER_VERSION: 470
ubuntu20.04-mofed5-cuda11:
image: ubuntu20.04-mofed5-cuda11:3
build:
Expand All @@ -64,6 +58,7 @@ services:
MOFED_VERSION: 5.0-1.0.0.0
UBUNTU_VERSION: 20.04
CUDA_VERSION: 11.4.0
DRIVER_VERSION: 470
ubuntu22.04-mofed5-cuda11:
image: ubuntu22.04-mofed5-cuda11:3
build:
Expand All @@ -74,3 +69,4 @@ services:
MOFED_VERSION: 5.4-3.6.8.1
UBUNTU_VERSION: 22.04
CUDA_VERSION: 11.7.0
DRIVER_VERSION: 515
4 changes: 2 additions & 2 deletions buildlib/dockers/push-release-images.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#!/bin/bash -eE
#!/bin/bash -eEx

# shellcheck disable=SC2086
basedir=$(cd "$(dirname $0)" && pwd)

registry=harbor.mellanox.com/ucx

images=$(awk '/image:/ {print $2}' "${basedir}/docker-compose.yml")
images=$(awk '!/#/ && /image:/ {print $2}' "${basedir}/docker-compose.yml")
for img in $images; do
target_name="${registry}/${img}"
docker tag ${img} ${target_name}
Expand Down
11 changes: 6 additions & 5 deletions buildlib/dockers/ubuntu-release.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
ARG CUDA_VERSION=10.1
ARG UBUNTU_VERSION=16.04
ARG CUDA_VERSION
ARG UBUNTU_VERSION
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}

ARG DRIVER_VERSION
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata && \
apt-get install -y \
Expand All @@ -14,13 +15,15 @@ RUN apt-get update && \
libcap2 \
libnuma-dev \
libtool \
libnvidia-compute-${DRIVER_VERSION} \
make \
maven \
udev \
wget \
environment-modules \
pkg-config \
&& apt-get remove -y openjdk-11-* || apt-get autoremove -y \
sudo \
&& apt-get remove -y openjdk-11-* cuda-compat* || apt-get autoremove -y \
&& apt-get clean && rm -rf /var/lib/apt/lists/*

# MOFED
Expand All @@ -46,5 +49,3 @@ ENV CPATH /usr/local/cuda/include:${CPATH}
ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/compat:${LD_LIBRARY_PATH}
ENV LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/compat:${LIBRARY_PATH}
ENV PATH /usr/local/cuda/compat:${PATH}

RUN ml_stub=$(find /usr -name libnvidia-ml.so) && ln -s $ml_stub /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1
2 changes: 1 addition & 1 deletion debian/control.in
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ Package: ucx-gdrcopy
Section: libs
Depends: ${misc:Depends}, ${shlibs:Depends}
Architecture: any
Build-Profiles: <cuda>
Build-Profiles: <gdrcopy>
Description: Unified Communication X - gdrcopy support
UCX is a communication library implementing high-performance messaging.
.
Expand Down

0 comments on commit cd8182b

Please sign in to comment.