Skip to content

Commit

Permalink
Merge pull request #8907 from Al3xR01/topic/rm_deps_from_debs-v14x
Browse files Browse the repository at this point in the history
AZP/RELEASE: rm cuda-compat & separate gdrcopy"
  • Loading branch information
yosefe authored Mar 1, 2023
2 parents f9e2f91 + 43940c1 commit efde10f
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 30 deletions.
20 changes: 14 additions & 6 deletions buildlib/az-distro-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@ jobs:
centos8_cuda11:
build_container: centos8_cuda11
artifact_name: $(POSTFIX)-centos8-mofed5-cuda11.tar.bz2
ubuntu16_cuda11:
build_container: ubuntu16_cuda11
artifact_name: $(POSTFIX)-ubuntu16.04-mofed5-cuda11.tar.bz2
ubuntu18_cuda11:
build_container: ubuntu18_cuda11
artifact_name: $(POSTFIX)-ubuntu18.04-mofed5-cuda11.tar.bz2
Expand Down Expand Up @@ -62,6 +59,8 @@ jobs:
- bash: |
set -eEx
# Build
./autogen.sh
./contrib/configure-release --with-cuda --with-java=no
make dist
Expand All @@ -71,11 +70,20 @@ jobs:
echo 10 > debian/compat # https://www.debian.org/doc/manuals/maint-guide/dother.en.htmdpl#compat
dpkg-buildpackage -us -uc -Pcuda
cd .. # Move back to the working directory
find . -name '*.deb'
VER="${POSTFIX#ucx-}" # Remove 'ucx' prefix from the POSTFIX string
# Rename DEB files
# Rename DEB files
VER="${POSTFIX#ucx-}" # Remove 'ucx-' prefix from the POSTFIX string
find . -name "ucx*.deb" -exec bash -c 'mv "$1" "${1%%_*}-'"${VER}"'.deb"' _ {} \;
find . -name '*.deb' # Show new names
# Remove superfluous dependency
dpkg-deb -R "ucx-cuda-${VER}.deb" tmp # Extract
sed -i 's/libnvidia-compute-[0-9]* | libnvidia-ml1, //g' tmp/DEBIAN/control
dpkg-deb -b tmp "ucx-cuda-${VER}.deb" # Rebuild
dpkg-deb -I "ucx-cuda-${VER}.deb"
dpkg-deb -I "ucx-${VER}.deb"
# Package
tar -cjf "${AZ_ARTIFACT_NAME}" *.deb # Package all DEBs
tar -tjf "${AZ_ARTIFACT_NAME}"
displayName: Build DEB package
Expand Down
4 changes: 1 addition & 3 deletions buildlib/azure-pipelines-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,12 @@ resources:
options: $(DOCKER_OPT_VOLUMES)
- container: centos8_cuda11
image: rdmz-harbor.rdmz.labs.mlnx/ucx/centos8-mofed5-cuda11:2
- container: ubuntu16_cuda11
image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu16.04-mofed5-cuda11:3
- container: ubuntu18_cuda11
image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu18.04-mofed5-cuda11:3
- container: ubuntu20_cuda11
image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu20.04-mofed5-cuda11:3
- container: ubuntu22_cuda11
image: rdmz-harbor.rdmz.labs.mlnx/ucx/x86_64/ubuntu22.04-mofed5-cuda11:3
image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu22.04-mofed5-cuda11:3

stages:
- stage: Prepare
Expand Down
16 changes: 6 additions & 10 deletions buildlib/dockers/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
version: "3.4"

# Find driver version based on CUDA version, OS and CPU arch (515 in this case):
# https://developer.nvidia.com/cuda-11-7-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=22.04&target_type=runfile_local

services:
centos7-mofed5-cuda11:
image: centos7-mofed5-cuda11:2
Expand Down Expand Up @@ -34,16 +37,6 @@ services:
MOFED_OS: rhel8.2
CUDA_VERSION: 11.4.0
OS_VERSION: 8
ubuntu16.04-mofed5-cuda11:
image: ubuntu16.04-mofed5-cuda11:3
build:
context: .
network: host
dockerfile: ubuntu-release.Dockerfile
args:
MOFED_VERSION: 5.0-1.0.0.0
UBUNTU_VERSION: 16.04
CUDA_VERSION: 11.2.0
ubuntu18.04-mofed5-cuda11:
image: ubuntu18.04-mofed5-cuda11:3
build:
Expand All @@ -54,6 +47,7 @@ services:
MOFED_VERSION: 5.0-1.0.0.0
UBUNTU_VERSION: 18.04
CUDA_VERSION: 11.4.0
NV_DRIVER_VERSION: 470
ubuntu20.04-mofed5-cuda11:
image: ubuntu20.04-mofed5-cuda11:3
build:
Expand All @@ -64,6 +58,7 @@ services:
MOFED_VERSION: 5.0-1.0.0.0
UBUNTU_VERSION: 20.04
CUDA_VERSION: 11.4.0
NV_DRIVER_VERSION: 470
ubuntu22.04-mofed5-cuda11:
image: ubuntu22.04-mofed5-cuda11:3
build:
Expand All @@ -74,3 +69,4 @@ services:
MOFED_VERSION: 5.4-3.6.8.1
UBUNTU_VERSION: 22.04
CUDA_VERSION: 11.7.0
NV_DRIVER_VERSION: 515
4 changes: 2 additions & 2 deletions buildlib/dockers/push-release-images.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#!/bin/bash -eE
#!/bin/bash -eEx

# shellcheck disable=SC2086
basedir=$(cd "$(dirname $0)" && pwd)

registry=harbor.mellanox.com/ucx

images=$(awk '/image:/ {print $2}' "${basedir}/docker-compose.yml")
images=$(awk '!/#/ && /image:/ {print $2}' "${basedir}/docker-compose.yml")
for img in $images; do
target_name="${registry}/${img}"
docker tag ${img} ${target_name}
Expand Down
18 changes: 10 additions & 8 deletions buildlib/dockers/ubuntu-release.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
ARG CUDA_VERSION=10.1
ARG UBUNTU_VERSION=16.04
ARG CUDA_VERSION
ARG UBUNTU_VERSION
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}

ARG NV_DRIVER_VERSION
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata && \
apt-get install -y \
apt-file \
automake \
default-jdk \
dh-make \
Expand All @@ -14,13 +16,16 @@ RUN apt-get update && \
libcap2 \
libnuma-dev \
libtool \
# Provide the dependencies required by libnvidia-compute* instead the cuda-compat*
libnvidia-compute-${NV_DRIVER_VERSION} \
make \
maven \
udev \
wget \
environment-modules \
pkg-config \
&& apt-get remove -y openjdk-11-* || apt-get autoremove -y \
sudo \
&& apt-get remove -y openjdk-11-* cuda-compat* || apt-get autoremove -y \
&& apt-get clean && rm -rf /var/lib/apt/lists/*

# MOFED
Expand All @@ -43,8 +48,5 @@ RUN ${MOFED_DIR}/mlnxofedinstall --all -q \
rm -rf ${MOFED_DIR} && rm -rf *.tgz

ENV CPATH /usr/local/cuda/include:${CPATH}
ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/compat:${LD_LIBRARY_PATH}
ENV LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/compat:${LIBRARY_PATH}
ENV PATH /usr/local/cuda/compat:${PATH}

RUN ml_stub=$(find /usr -name libnvidia-ml.so) && ln -s $ml_stub /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1
ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:${LD_LIBRARY_PATH}
ENV LIBRARY_PATH /usr/local/cuda/lib64:${LIBRARY_PATH}
2 changes: 1 addition & 1 deletion debian/control.in
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ Package: ucx-gdrcopy
Section: libs
Depends: ${misc:Depends}, ${shlibs:Depends}
Architecture: any
Build-Profiles: <cuda>
Build-Profiles: <gdrcopy>
Description: Unified Communication X - gdrcopy support
UCX is a communication library implementing high-performance messaging.
.
Expand Down

0 comments on commit efde10f

Please sign in to comment.