Skip to content

Commit

Permalink
AZP/RELEASE: rm cuda-compat & separate gdrcopy
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexey-Rivkin committed Feb 28, 2023
1 parent f9e2f91 commit 395ac68
Show file tree
Hide file tree
Showing 144 changed files with 62,249 additions and 30 deletions.
20 changes: 14 additions & 6 deletions buildlib/az-distro-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@ jobs:
centos8_cuda11:
build_container: centos8_cuda11
artifact_name: $(POSTFIX)-centos8-mofed5-cuda11.tar.bz2
ubuntu16_cuda11:
build_container: ubuntu16_cuda11
artifact_name: $(POSTFIX)-ubuntu16.04-mofed5-cuda11.tar.bz2
ubuntu18_cuda11:
build_container: ubuntu18_cuda11
artifact_name: $(POSTFIX)-ubuntu18.04-mofed5-cuda11.tar.bz2
Expand Down Expand Up @@ -62,6 +59,8 @@ jobs:
- bash: |
set -eEx
# Build
./autogen.sh
./contrib/configure-release --with-cuda --with-java=no
make dist
Expand All @@ -71,11 +70,20 @@ jobs:
echo 10 > debian/compat # https://www.debian.org/doc/manuals/maint-guide/dother.en.htmdpl#compat
dpkg-buildpackage -us -uc -Pcuda
cd .. # Move back to the working directory
find . -name '*.deb'
VER="${POSTFIX#ucx-}" # Remove 'ucx' prefix from the POSTFIX string
# Rename DEB files
# Rename DEB files
VER="${POSTFIX#ucx-}" # Remove 'ucx-' prefix from the POSTFIX string
find . -name "ucx*.deb" -exec bash -c 'mv "$1" "${1%%_*}-'"${VER}"'.deb"' _ {} \;
find . -name '*.deb' # Show new names
# Remove superfluous dependency
dpkg-deb -R "ucx-cuda-${VER}.deb" tmp # Extract
sed -i 's/libnvidia-compute-[0-9]* | libnvidia-ml1, //g' tmp/DEBIAN/control
dpkg-deb -b tmp "ucx-cuda-${VER}.deb" # Rebuild
dpkg-deb -I "ucx-cuda-${VER}.deb"
dpkg-deb -I "ucx-${VER}.deb"
# Package
tar -cjf "${AZ_ARTIFACT_NAME}" *.deb # Package all DEBs
tar -tjf "${AZ_ARTIFACT_NAME}"
displayName: Build DEB package
Expand Down
4 changes: 1 addition & 3 deletions buildlib/azure-pipelines-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,12 @@ resources:
options: $(DOCKER_OPT_VOLUMES)
- container: centos8_cuda11
image: rdmz-harbor.rdmz.labs.mlnx/ucx/centos8-mofed5-cuda11:2
- container: ubuntu16_cuda11
image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu16.04-mofed5-cuda11:3
- container: ubuntu18_cuda11
image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu18.04-mofed5-cuda11:3
- container: ubuntu20_cuda11
image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu20.04-mofed5-cuda11:3
- container: ubuntu22_cuda11
image: rdmz-harbor.rdmz.labs.mlnx/ucx/x86_64/ubuntu22.04-mofed5-cuda11:3
image: rdmz-harbor.rdmz.labs.mlnx/ucx/ubuntu22.04-mofed5-cuda11:3

stages:
- stage: Prepare
Expand Down
16 changes: 6 additions & 10 deletions buildlib/dockers/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
version: "3.4"

# Find driver version based on CUDA version, OS and CPU arch (515 in this case):
# https://developer.nvidia.com/cuda-11-7-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=22.04&target_type=runfile_local

services:
centos7-mofed5-cuda11:
image: centos7-mofed5-cuda11:2
Expand Down Expand Up @@ -34,16 +37,6 @@ services:
MOFED_OS: rhel8.2
CUDA_VERSION: 11.4.0
OS_VERSION: 8
ubuntu16.04-mofed5-cuda11:
image: ubuntu16.04-mofed5-cuda11:3
build:
context: .
network: host
dockerfile: ubuntu-release.Dockerfile
args:
MOFED_VERSION: 5.0-1.0.0.0
UBUNTU_VERSION: 16.04
CUDA_VERSION: 11.2.0
ubuntu18.04-mofed5-cuda11:
image: ubuntu18.04-mofed5-cuda11:3
build:
Expand All @@ -54,6 +47,7 @@ services:
MOFED_VERSION: 5.0-1.0.0.0
UBUNTU_VERSION: 18.04
CUDA_VERSION: 11.4.0
DRIVER_VERSION: 470
ubuntu20.04-mofed5-cuda11:
image: ubuntu20.04-mofed5-cuda11:3
build:
Expand All @@ -64,6 +58,7 @@ services:
MOFED_VERSION: 5.0-1.0.0.0
UBUNTU_VERSION: 20.04
CUDA_VERSION: 11.4.0
DRIVER_VERSION: 470
ubuntu22.04-mofed5-cuda11:
image: ubuntu22.04-mofed5-cuda11:3
build:
Expand All @@ -74,3 +69,4 @@ services:
MOFED_VERSION: 5.4-3.6.8.1
UBUNTU_VERSION: 22.04
CUDA_VERSION: 11.7.0
DRIVER_VERSION: 515
4 changes: 2 additions & 2 deletions buildlib/dockers/push-release-images.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#!/bin/bash -eE
#!/bin/bash -eEx

# shellcheck disable=SC2086
basedir=$(cd "$(dirname $0)" && pwd)

registry=harbor.mellanox.com/ucx

images=$(awk '/image:/ {print $2}' "${basedir}/docker-compose.yml")
images=$(awk '!/#/ && /image:/ {print $2}' "${basedir}/docker-compose.yml")
for img in $images; do
target_name="${registry}/${img}"
docker tag ${img} ${target_name}
Expand Down
17 changes: 9 additions & 8 deletions buildlib/dockers/ubuntu-release.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
ARG CUDA_VERSION=10.1
ARG UBUNTU_VERSION=16.04
ARG CUDA_VERSION
ARG UBUNTU_VERSION
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}

ARG DRIVER_VERSION
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata && \
apt-get install -y \
apt-file \
automake \
default-jdk \
dh-make \
Expand All @@ -14,13 +16,15 @@ RUN apt-get update && \
libcap2 \
libnuma-dev \
libtool \
libnvidia-compute-${DRIVER_VERSION} \
make \
maven \
udev \
wget \
environment-modules \
pkg-config \
&& apt-get remove -y openjdk-11-* || apt-get autoremove -y \
sudo \
&& apt-get remove -y openjdk-11-* cuda-compat* || apt-get autoremove -y \
&& apt-get clean && rm -rf /var/lib/apt/lists/*

# MOFED
Expand All @@ -43,8 +47,5 @@ RUN ${MOFED_DIR}/mlnxofedinstall --all -q \
rm -rf ${MOFED_DIR} && rm -rf *.tgz

ENV CPATH /usr/local/cuda/include:${CPATH}
ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/compat:${LD_LIBRARY_PATH}
ENV LIBRARY_PATH /usr/local/cuda/lib64:/usr/local/cuda/compat:${LIBRARY_PATH}
ENV PATH /usr/local/cuda/compat:${PATH}

RUN ml_stub=$(find /usr -name libnvidia-ml.so) && ln -s $ml_stub /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1
ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:${LD_LIBRARY_PATH}
ENV LIBRARY_PATH /usr/local/cuda/lib64:${LIBRARY_PATH}
197 changes: 197 additions & 0 deletions confdefs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
/* confdefs.h */
#define PACKAGE_NAME "ucx"
#define PACKAGE_TARNAME "ucx"
#define PACKAGE_VERSION "1.14"
#define PACKAGE_STRING "ucx 1.14"
#define PACKAGE_BUGREPORT ""
#define PACKAGE_URL ""
#define HAVE_STDIO_H 1
#define HAVE_STDLIB_H 1
#define HAVE_STRING_H 1
#define HAVE_INTTYPES_H 1
#define HAVE_STDINT_H 1
#define HAVE_STRINGS_H 1
#define HAVE_SYS_STAT_H 1
#define HAVE_SYS_TYPES_H 1
#define HAVE_UNISTD_H 1
#define HAVE_WCHAR_H 1
#define STDC_HEADERS 1
#define _ALL_SOURCE 1
#define _DARWIN_C_SOURCE 1
#define _GNU_SOURCE 1
#define _HPUX_ALT_XOPEN_SOCKET_API 1
#define _NETBSD_SOURCE 1
#define _OPENBSD_SOURCE 1
#define _POSIX_PTHREAD_SEMANTICS 1
#define __STDC_WANT_IEC_60559_ATTRIBS_EXT__ 1
#define __STDC_WANT_IEC_60559_BFP_EXT__ 1
#define __STDC_WANT_IEC_60559_DFP_EXT__ 1
#define __STDC_WANT_IEC_60559_FUNCS_EXT__ 1
#define __STDC_WANT_IEC_60559_TYPES_EXT__ 1
#define __STDC_WANT_LIB_EXT2__ 1
#define __STDC_WANT_MATH_SPEC_FUNCS__ 1
#define _TANDEM_SOURCE 1
#define __EXTENSIONS__ 1
#define PACKAGE "ucx"
#define VERSION "1.14"
#define HAVE_DLFCN_H 1
#define LT_OBJDIR ".libs/"
#define restrict __restrict__
#define HAVE_DECL_STRERROR_R 1
#define HAVE_STRERROR_R 1
#define STRERROR_R_CHAR_P 1
#define UCX_CONFIGURE_FLAGS "--disable-logging --disable-debug --disable-assertions --disable-params-check --with-cuda --with-java=no"
#define UCX_MODULE_SUBDIR "ucx"
#define HAVE_ATTRIBUTE_NOOPTIMIZE 1
#define HAVE_ALLOCA_H 1
#define HAVE_ALLOCA 1
#define HAVE_LIBRT 1
#define HAVE_LIBRT 1
#define HAVE_LIBGEN_H 1
#define HAVE_DECL_ASPRINTF 1
#define HAVE_DECL_BASENAME 1
#define HAVE_DECL_FMEMOPEN 1
#define HAVE_DECL_CPU_ZERO 1
#define HAVE_DECL_CPU_ISSET 1
#define HAVE_CPU_SET_T 1
#define HAVE_SIGHANDLER_T 1
#define HAVE___SIGHANDLER_T 1
#define HAVE_CLEARENV 1
#define HAVE_MALLOC_TRIM 1
#define HAVE_MEMALIGN 1
#define HAVE_POSIX_MEMALIGN 1
#define HAVE_MREMAP 1
#define HAVE_SCHED_SETAFFINITY 1
#define HAVE_SCHED_GETAFFINITY 1
#define HAVE_DECL_F_SETOWN_EX 1
#define HAVE_DECL_ETHTOOL_CMD_SPEED 1
#define HAVE_DECL_SPEED_UNKNOWN 1
#define HAVE_DECL___PPC_GET_TIMEBASE_FREQ 0
#define HAVE_DECL___PPC_GET_TIMEBASE 0
#define NVALGRIND 1
#define HAVE_NUMA_H 1
#define HAVE_NUMAIF_H 1
#define HAVE_STRUCT_BITMASK 1
#define HAVE_NUMA 1
#define HAVE_DECL_PR_SET_PTRACER 1
#define HAVE_IN6_ADDR_S6_ADDR32 1
#define HAVE_IP_IP_DST 1
#define HAVE_SIGEVENT_SIGEV_UN_TID 1
#define HAVE_SIGACTION_SA_RESTORER 1
#define HAVE_SYS_EPOLL_H 1
#define HAVE_SYS_EVENTFD_H 1
#define HAVE_MALLOC_H 1
#define HAVE_LINUX_MMAN_H 1
#define HAVE_LINUX_IP_H 1
#define HAVE_LINUX_FUTEX_H 1
#define HAVE_NET_ETHERNET_H 1
#define HAVE_NETINET_IP_H 1
#define HAVE_UCM_PTMALLOC286 1
#define HAVE_DECL_MADV_FREE 1
#define HAVE_DECL_MADV_REMOVE 1
#define HAVE_DECL_POSIX_MADV_DONTNEED 1
#define HAVE_DECL_GETAUXVAL 1
#define HAVE_DECL_SYS_MMAP 1
#define HAVE_DECL_SYS_MUNMAP 1
#define HAVE_DECL_SYS_MREMAP 1
#define HAVE_DECL_SYS_BRK 1
#define HAVE_DECL_SYS_MADVISE 1
#define HAVE_DECL_SYS_SHMAT 1
#define HAVE_DECL_SYS_SHMDT 1
#define HAVE_DECL_SYS_IPC 0
#define UCM_BISTRO_HOOKS 1
#define HAVE___CURBRK 1
#define HAVE_DECL_FUSE_OPEN_CHANNEL 0
#define HAVE_DECL_FUSE_MOUNT 0
#define HAVE_DECL_FUSE_UNMOUNT 0
#define HAVE_CUDA_H 1
#define HAVE_CUDA_RUNTIME_H 1
#define HAVE_NVML_H 1
#define HAVE_CUDA 1
#define ucm_MODULES ":cuda"
#define HAVE_DECL_INOTIFY_INIT 1
#define HAVE_INOTIFY 1
#define HAVE_DECL_INOTIFY_ADD_WATCH 1
#define HAVE_INOTIFY 1
#define HAVE_DECL_IN_ATTRIB 1
#define HAVE_INOTIFY 1
#define ucs_MODULES ""
#define HAVE_STRUCT_DL_PHDR_INFO 1
#define UCS_MAX_LOG_LEVEL UCS_LOG_LEVEL_DEBUG
#define HAVE_HW_TIMER 1
#define ENABLE_BUILTIN_MEMCPY 1
#define HAVE___CLEAR_CACHE 1
#define uct_cuda_MODULES ""
#define HAVE_DECL_IBV_WC_STATUS_STR 1
#define HAVE_DECL_IBV_EVENT_TYPE_STR 1
#define HAVE_DECL_IBV_QUERY_GID 1
#define HAVE_DECL_IBV_GET_DEVICE_NAME 1
#define HAVE_DECL_IBV_CREATE_SRQ 1
#define HAVE_DECL_IBV_GET_ASYNC_EVENT 1
#define HAVE_DECL_IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN 1
#define HAVE_INFINIBAND_MLX5DV_H 1
#define HAVE_DECL_MLX5DV_INIT_OBJ 1
#define HAVE_DECL_MLX5DV_CREATE_QP 1
#define HAVE_DECL_MLX5DV_IS_SUPPORTED 1
#define HAVE_DECL_MLX5DV_DEVX_SUBSCRIBE_DEVX_EVENT 1
#define HAVE_DECL_MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE 1
#define HAVE_DECL_MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE 1
#define HAVE_DECL_MLX5DV_QP_CREATE_ALLOW_SCATTER_TO_CQE 1
#define HAVE_DECL_MLX5DV_UAR_ALLOC_TYPE_BF 1
#define HAVE_DECL_MLX5DV_UAR_ALLOC_TYPE_NC 1
#define HAVE_DECL_MLX5DV_DEVX_UMEM_REG_EX 1
#define HAVE_STRUCT_MLX5DV_CQ_CQ_UAR 1
#define HAVE_DECL_MLX5DV_OBJ_AH 1
#define HAVE_DECL_MLX5DV_DCTYPE_DCT 1
#define HAVE_DECL_IBV_ALLOC_TD 1
#define HAVE_DEVX 1
#define HAVE_MLX5_DV 1
#define HAVE_MLX5_HW_UD 1
#define HAVE_DECL_IBV_LINK_LAYER_INFINIBAND 1
#define HAVE_DECL_IBV_LINK_LAYER_ETHERNET 1
#define HAVE_DECL_IBV_EVENT_GID_CHANGE 1
#define HAVE_DECL_IBV_TRANSPORT_USNIC 1
#define HAVE_DECL_IBV_TRANSPORT_USNIC_UDP 1
#define HAVE_DECL_IBV_TRANSPORT_UNSPECIFIED 1
#define HAVE_DECL_IBV_CREATE_QP_EX 1
#define HAVE_DECL_IBV_CREATE_CQ_EX 1
#define HAVE_DECL_IBV_CREATE_SRQ_EX 1
#define HAVE_DECL_IBV_REG_DMABUF_MR 1
#define HAVE_DECL_IBV_SET_ECE 1
#define HAVE_DECL_IBV_QUERY_DEVICE_EX 1
#define HAVE_STRUCT_IBV_DEVICE_ATTR_EX_PCI_ATOMIC_CAPS 1
#define HAVE_DECL_IBV_ACCESS_ON_DEMAND 1
#define HAVE_ODP 1
#define HAVE_DECL_IBV_ODP_SUPPORT_IMPLICIT 1
#define HAVE_ODP_IMPLICIT 1
#define HAVE_DECL_IBV_ACCESS_RELAXED_ORDERING 1
#define HAVE_DECL_IBV_QPF_GRH_REQUIRED 1
#define HAVE_DECL_IBV_ADVISE_MR 1
#define HAVE_PREFETCH 1
#define HAVE_IB 1
#define HAVE_TL_DC 1
#define HAVE_DC_DV 1
#define HAVE_TL_RC 1
#define HAVE_TL_UD 1
#define IBV_HW_TM 1
#define HAVE_STRUCT_IBV_TM_CAPS_FLAGS 1
#define HAVE_DECL_IBV_ALLOC_DM 1
#define HAVE_IBV_DM 1
#define HAVE_DECL_IBV_CMD_MODIFY_QP 0
#define uct_ib_MODULES ""
#define uct_rocm_MODULES ""
#define HAVE_SYS_UIO_H 1
#define uct_MODULES ":cuda:ib:rdmacm:cma"
#define HAVE_DECL_IPPROTO_TCP 1
#define HAVE_DECL_SOL_SOCKET 1
#define HAVE_DECL_SO_KEEPALIVE 1
#define HAVE_DECL_TCP_KEEPCNT 1
#define HAVE_DECL_TCP_KEEPIDLE 1
#define HAVE_DECL_TCP_KEEPINTVL 1
#define UCT_TCP_EP_KEEPALIVE 1
#define ucx_perftest_MODULES ":cuda"
#define test_MODULES ":module"
#define ENABLE_PARAMS_CHECK 0
#define ENABLE_DEBUG_DATA 0
#define UCT_UD_EP_DEBUG_HOOKS 0
#define ENABLE_MT 0
Loading

0 comments on commit 395ac68

Please sign in to comment.