Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AZP/RELEASE: rm cuda-compat & separate gdrcopy" #8907

Merged
merged 1 commit into from
Mar 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions buildlib/az-distro-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@ jobs:
centos8_cuda11:
build_container: centos8_cuda11
artifact_name: $(POSTFIX)-centos8-mofed5-cuda11.tar.bz2
ubuntu16_cuda11:
build_container: ubuntu16_cuda11
artifact_name: $(POSTFIX)-ubuntu16.04-mofed5-cuda11.tar.bz2
ubuntu18_cuda11:
build_container: ubuntu18_cuda11
artifact_name: $(POSTFIX)-ubuntu18.04-mofed5-cuda11.tar.bz2
Expand Down Expand Up @@ -62,6 +59,8 @@ jobs:

- bash: |
set -eEx

# Build
./autogen.sh
./contrib/configure-release --with-cuda --with-java=no
make dist
Expand All @@ -71,11 +70,20 @@ jobs:
echo 10 > debian/compat # https://www.debian.org/doc/manuals/maint-guide/dother.en.htmdpl#compat
dpkg-buildpackage -us -uc -Pcuda
cd .. # Move back to the working directory
find . -name '*.deb'
VER="${POSTFIX#ucx-}" # Remove 'ucx' prefix from the POSTFIX string
# Rename DEB files

# Rename DEB files
VER="${POSTFIX#ucx-}" # Remove 'ucx-' prefix from the POSTFIX string
find . -name "ucx*.deb" -exec bash -c 'mv "$1" "${1%%_*}-'"${VER}"'.deb"' _ {} \;
find . -name '*.deb' # Show new names

# Remove superfluous dependency
artemry-nv marked this conversation as resolved.
Show resolved Hide resolved
Alexey-Rivkin marked this conversation as resolved.
Show resolved Hide resolved
dpkg-deb -R "ucx-cuda-${VER}.deb" tmp # Extract
sed -i 's/libnvidia-compute-[0-9]* | libnvidia-ml1, //g' tmp/DEBIAN/control
dpkg-deb -b tmp "ucx-cuda-${VER}.deb" # Rebuild
dpkg-deb -I "ucx-cuda-${VER}.deb"
artemry-nv marked this conversation as resolved.
Show resolved Hide resolved
dpkg-deb -I "ucx-${VER}.deb"

# Package
tar -cjf "${AZ_ARTIFACT_NAME}" *.deb # Package all DEBs
tar -tjf "${AZ_ARTIFACT_NAME}"
displayName: Build DEB package
Expand Down
4 changes: 1 addition & 3 deletions buildlib/azure-pipelines-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,12 @@ resources:
options: $(DOCKER_OPT_VOLUMES)
- container: centos8_cuda11
image: rdmz-harbor.rdmz.labs.mlnx/ucx/centos8-mofed5-cuda11:2
- container: ubuntu16_cuda11
image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu16.04-mofed5-cuda11:3
- container: ubuntu18_cuda11
image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu18.04-mofed5-cuda11:3
- container: ubuntu20_cuda11
image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu20.04-mofed5-cuda11:3
- container: ubuntu22_cuda11
image: rdmz-harbor.rdmz.labs.mlnx/ucx/x86_64/ubuntu22.04-mofed5-cuda11:3
image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu22.04-mofed5-cuda11:3

stages:
- stage: Prepare
Expand Down
16 changes: 6 additions & 10 deletions buildlib/dockers/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
version: "3.4"

# Find driver version based on CUDA version, OS and CPU arch (515 in this case):
# https://developer.nvidia.com/cuda-11-7-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=22.04&target_type=runfile_local
Alexey-Rivkin marked this conversation as resolved.
Show resolved Hide resolved

services:
centos7-mofed5-cuda11:
image: centos7-mofed5-cuda11:2
Expand Down Expand Up @@ -34,16 +37,6 @@ services:
MOFED_OS: rhel8.2
CUDA_VERSION: 11.4.0
OS_VERSION: 8
ubuntu16.04-mofed5-cuda11:
image: ubuntu16.04-mofed5-cuda11:3
build:
context: .
network: host
dockerfile: ubuntu-release.Dockerfile
args:
MOFED_VERSION: 5.0-1.0.0.0
UBUNTU_VERSION: 16.04
CUDA_VERSION: 11.2.0
ubuntu18.04-mofed5-cuda11:
image: ubuntu18.04-mofed5-cuda11:3
build:
Expand All @@ -54,6 +47,7 @@ services:
MOFED_VERSION: 5.0-1.0.0.0
UBUNTU_VERSION: 18.04
CUDA_VERSION: 11.4.0
NV_DRIVER_VERSION: 470
ubuntu20.04-mofed5-cuda11:
image: ubuntu20.04-mofed5-cuda11:3
build:
Expand All @@ -64,6 +58,7 @@ services:
MOFED_VERSION: 5.0-1.0.0.0
UBUNTU_VERSION: 20.04
CUDA_VERSION: 11.4.0
NV_DRIVER_VERSION: 470
ubuntu22.04-mofed5-cuda11:
image: ubuntu22.04-mofed5-cuda11:3
build:
Expand All @@ -74,3 +69,4 @@ services:
MOFED_VERSION: 5.4-3.6.8.1
UBUNTU_VERSION: 22.04
CUDA_VERSION: 11.7.0
NV_DRIVER_VERSION: 515
4 changes: 2 additions & 2 deletions buildlib/dockers/push-release-images.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#!/bin/bash -eE
#!/bin/bash -eEx

# shellcheck disable=SC2086
basedir=$(cd "$(dirname $0)" && pwd)

registry=harbor.mellanox.com/ucx

images=$(awk '/image:/ {print $2}' "${basedir}/docker-compose.yml")
images=$(awk '!/#/ && /image:/ {print $2}' "${basedir}/docker-compose.yml")
for img in $images; do
target_name="${registry}/${img}"
docker tag ${img} ${target_name}
Expand Down
18 changes: 10 additions & 8 deletions buildlib/dockers/ubuntu-release.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
ARG CUDA_VERSION=10.1
ARG UBUNTU_VERSION=16.04
ARG CUDA_VERSION
ARG UBUNTU_VERSION
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}

ARG NV_DRIVER_VERSION
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata && \
apt-get install -y \
apt-file \
automake \
default-jdk \
dh-make \
Expand All @@ -14,13 +16,16 @@ RUN apt-get update && \
libcap2 \
libnuma-dev \
libtool \
# Provide the dependencies required by libnvidia-compute* instead the cuda-compat*
libnvidia-compute-${NV_DRIVER_VERSION} \
make \
maven \
udev \
wget \
environment-modules \
pkg-config \
&& apt-get remove -y openjdk-11-* || apt-get autoremove -y \
sudo \
Alexey-Rivkin marked this conversation as resolved.
Show resolved Hide resolved
&& apt-get remove -y openjdk-11-* cuda-compat* || apt-get autoremove -y \
Alexey-Rivkin marked this conversation as resolved.
Show resolved Hide resolved
&& apt-get clean && rm -rf /var/lib/apt/lists/*

# MOFED
Expand All @@ -43,8 +48,5 @@ RUN ${MOFED_DIR}/mlnxofedinstall --all -q \
rm -rf ${MOFED_DIR} && rm -rf *.tgz

ENV CPATH /usr/local/cuda/include:${CPATH}
ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/compat:${LD_LIBRARY_PATH}
ENV LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/compat:${LIBRARY_PATH}
ENV PATH /usr/local/cuda/compat:${PATH}

RUN ml_stub=$(find /usr -name libnvidia-ml.so) && ln -s $ml_stub /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1
artemry-nv marked this conversation as resolved.
Show resolved Hide resolved
ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:${LD_LIBRARY_PATH}
ENV LIBRARY_PATH /usr/local/cuda/lib64:${LIBRARY_PATH}
2 changes: 1 addition & 1 deletion debian/control.in
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ Package: ucx-gdrcopy
Section: libs
Depends: ${misc:Depends}, ${shlibs:Depends}
Architecture: any
Build-Profiles: <cuda>
Build-Profiles: <gdrcopy>
Description: Unified Communication X - gdrcopy support
UCX is a communication library implementing high-performance messaging.
.
Expand Down