Skip to content

Commit

Permalink
Add gpu benchmark (#330)
Browse files Browse the repository at this point in the history
Added GPU benchmarking to the CI pipeline.

---------

Co-authored-by: Rico Haeuselmann <r.haeuselmann@gmx.ch>
  • Loading branch information
samkellerhals and DropD authored Dec 13, 2023
1 parent 598e434 commit 46f6a5f
Show file tree
Hide file tree
Showing 23 changed files with 163 additions and 82 deletions.
50 changes: 39 additions & 11 deletions ci/cscs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ stages:
- test
- benchmark

.py310: &py310
PYVERSION_PREFIX: py310
PYVERSION: 3.10.9

variables:
PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/icon4py:$CI_COMMIT_SHORT_SHA
PYTHON_VERSION: "3.10"
Expand All @@ -15,39 +19,63 @@ variables:
extends: .container-builder
variables:
DOCKERFILE: ci/docker/Dockerfile.build
DOCKER_BUILD_ARGS: '["PYVERSION=$PYVERSION"]'
<<: *py310

.test_template:
extends: .container-runner-clariden-a100
timeout: 2h
extends: .container-runner-daint-gpu
timeout: 8h
image: $PERSIST_IMAGE_NAME
before_script:
- apt-get update
- python3 -m pip install --upgrade pip setuptools wheel
- cd /icon4py
- pyversion_no_dot="${PYTHON_VERSION//./}"
- pip install tox clang-format
- python -c "import cupy"
variables:
SLURM_JOB_NUM_NODES: 2
SLURM_NTASKS: 2
SLURM_TIMELIMIT: '02:00:00'
SLURM_JOB_NUM_NODES: 1
SLURM_NTASKS: 1
SLURM_TIMELIMIT: '06:00:00'
CRAY_CUDA_MPS: 1
NUM_PROCESSES: auto
VIRTUALENV_SYSTEM_SITE_PACKAGES: 1

build_job:
extends: .build_template

test_model_job_embedded_simple_grid:
test_model_job_roundtrip_simple_grid:
extends: .test_template
stage: test
script:
- tox -r -e py${pyversion_no_dot} -c model/ --verbose -- --benchmark-skip -n auto
- tox -r -c model/ --verbose -- --benchmark-skip -n auto

benchmark_model_gtfn_cpu_simple_grid:
test_model_job_gtfn_cpu_simple_grid:
extends: .test_template
stage: benchmark
stage: test
script:
- tox -r -e py${pyversion_no_dot} -c model/ --verbose -- --benchmark-only --backend=gtfn_cpu --grid=simple_grid
- tox -r -e stencil_tests -c model/ --verbose -- --benchmark-skip -n auto --backend=gtfn_cpu

test_model_job_gtfn_gpu_simple_grid:
extends: .test_template
stage: test
script:
- tox -r -e stencil_tests -c model/ --verbose -- --benchmark-skip -n auto --backend=gtfn_gpu

test_tools_job:
extends: .test_template
stage: test
script:
- tox -r -e py${pyversion_no_dot} -c tools/ --verbose
- tox -r -c tools/ --verbose

benchmark_model_gtfn_cpu_simple_grid:
extends: .test_template
stage: benchmark
script:
- tox -r -e stencil_tests -c model/ --verbose -- --benchmark-only --backend=gtfn_cpu --grid=simple_grid

benchmark_model_gtfn_gpu_simple_grid:
extends: .test_template
stage: benchmark
script:
- tox -r -e stencil_tests -c model/ --verbose -- --benchmark-only --backend=gtfn_gpu --grid=simple_grid
83 changes: 55 additions & 28 deletions ci/docker/Dockerfile.build
Original file line number Diff line number Diff line change
@@ -1,31 +1,58 @@
FROM ubuntu:22.04 as builder
FROM docker.io/nvidia/cuda:11.2.2-devel-ubuntu20.04
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8

COPY . /icon4py

# Update and install necessary utilities and libraries
RUN apt-get update && \
apt-get install -y \
software-properties-common \
ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get update -qq && apt-get install -qq -y --no-install-recommends \
strace \
build-essential \
cmake \
git \
libboost-all-dev \
pkg-config \
tar \
wget \
curl \
gdb

# Install Python 3.10
RUN add-apt-repository -y ppa:deadsnakes/ppa && \
apt-get update && \
apt-get install -y python3.10 python3.10-distutils python3.10-venv && \
update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1

# Install pip
RUN apt-get install -y curl && \
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
python3.10 get-pip.py

# Cleanup
RUN apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
rm get-pip.py
ca-certificates \
zlib1g-dev \
libssl-dev \
libbz2-dev \
libsqlite3-dev \
llvm \
libncurses5-dev \
libncursesw5-dev \
xz-utils \
tk-dev \
libffi-dev \
liblzma-dev \
python-openssl \
libreadline-dev \
git \
rustc \
htop && \
rm -rf /var/lib/apt/lists/*

RUN wget --quiet https://boostorg.jfrog.io/artifactory/main/release/1.72.0/source/boost_1_72_0.tar.gz && \
echo c66e88d5786f2ca4dbebb14e06b566fb642a1a6947ad8cc9091f9f445134143f boost_1_72_0.tar.gz > boost_hash.txt && \
sha256sum -c boost_hash.txt && \
tar xzf boost_1_72_0.tar.gz && \
mv boost_1_72_0/boost /usr/local/include/ && \
rm boost_1_72_0.tar.gz boost_hash.txt

ENV BOOST_ROOT /usr/local/
ENV CUDA_HOME /usr/local/cuda

ARG PYVERSION

RUN curl https://pyenv.run | bash

ENV PYENV_ROOT /root/.pyenv
ENV PATH="/root/.pyenv/bin:${PATH}"

RUN pyenv update && \
pyenv install ${PYVERSION} && \
echo 'eval "$(pyenv init -)"' >> /root/.bashrc && \
eval "$(pyenv init -)" && \
pyenv global ${PYVERSION}

ENV PATH="/root/.pyenv/shims:${PATH}"

COPY . /icon4py

RUN pip install --upgrade pip setuptools wheel tox cupy-cuda11x clang-format
2 changes: 1 addition & 1 deletion model/atmosphere/advection/.pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ repos:
args: [--config-root, model/atmosphere/advection/, --resolve-all-configs]

- repo: https://github.com/PyCQA/flake8
rev: '4.0.1'
rev: '6.1.0'
hooks:
- id: flake8
name: flake8 code style checks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

import numpy as np
from gt4py.next.ffront.fbuiltins import int32
from gt4py.next.iterator.embedded import StridedNeighborOffsetProvider

from icon4py.model.atmosphere.advection.btraj_dreg_stencil_02 import btraj_dreg_stencil_02
from icon4py.model.common.dimension import E2CDim, ECDim, EdgeDim, KDim
Expand Down Expand Up @@ -66,7 +65,7 @@ def test_btraj_dreg_stencil_02(backend):
opt_famask_dsl,
offset_provider={
"E2C": grid.get_offset_provider("E2C"),
"E2EC": StridedNeighborOffsetProvider(EdgeDim, ECDim, grid.size[E2CDim]),
"E2EC": grid.get_offset_provider("E2EC"),
},
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import numpy as np
import pytest
from gt4py.next.ffront.fbuiltins import int32
from gt4py.next.iterator.embedded import StridedNeighborOffsetProvider

from icon4py.model.atmosphere.advection.divide_flux_area_list_stencil_01 import (
divide_flux_area_list_stencil_01,
Expand Down Expand Up @@ -767,7 +766,7 @@ def test_divide_flux_area_list_stencil_01(backend):
dreg_patch2_4_lat_vmask,
offset_provider={
"E2C": grid.get_offset_provider("E2C"),
"E2EC": StridedNeighborOffsetProvider(EdgeDim, ECDim, grid.size[E2CDim]),
"E2EC": grid.get_offset_provider("E2EC"),
},
)
assert np.allclose(dreg_patch0_1_lon_dsl, ref_1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

import numpy as np
from gt4py.next.ffront.fbuiltins import int32
from gt4py.next.iterator.embedded import StridedNeighborOffsetProvider

from icon4py.model.atmosphere.advection.divide_flux_area_list_stencil_02 import (
divide_flux_area_list_stencil_02,
Expand Down Expand Up @@ -308,7 +307,7 @@ def test_divide_flux_area_list_stencil_02(backend):
patch2_cell_blk_vmask,
offset_provider={
"E2C": grid.get_offset_provider("E2C"),
"E2EC": StridedNeighborOffsetProvider(EdgeDim, ECDim, grid.size[E2CDim]),
"E2EC": grid.get_offset_provider("E2EC"),
},
)
assert np.allclose(dreg_patch1_1_lon_vmask.asnumpy(), ref_1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# SPDX-License-Identifier: GPL-3.0-or-later

import numpy as np
from gt4py.next.iterator.embedded import StridedNeighborOffsetProvider

from icon4py.model.atmosphere.advection.hflx_limiter_mo_stencil_01b import (
hflx_limiter_mo_stencil_01b,
Expand Down Expand Up @@ -111,7 +110,7 @@ def test_hflx_limiter_mo_stencil_01b(backend):
z_tracer_min,
offset_provider={
"C2E": grid.get_offset_provider("C2E"),
"C2CE": StridedNeighborOffsetProvider(CellDim, CEDim, grid.size[C2EDim]),
"C2CE": grid.get_offset_provider("C2CE"),
},
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# SPDX-License-Identifier: GPL-3.0-or-later

import numpy as np
from gt4py.next.iterator.embedded import StridedNeighborOffsetProvider

from icon4py.model.atmosphere.advection.hflx_limiter_pd_stencil_01 import hflx_limiter_pd_stencil_01
from icon4py.model.common.dimension import C2EDim, CEDim, CellDim, EdgeDim, KDim
Expand Down Expand Up @@ -69,7 +68,7 @@ def test_hflx_limiter_pd_stencil_01(backend):
p_dtime,
dbl_eps,
offset_provider={
"C2CE": StridedNeighborOffsetProvider(CellDim, CEDim, grid.size[C2EDim]),
"C2CE": grid.get_offset_provider("C2CE"),
"C2E": grid.get_offset_provider("C2E"),
},
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# SPDX-License-Identifier: GPL-3.0-or-later

import numpy as np
from gt4py.next.iterator.embedded import StridedNeighborOffsetProvider

from icon4py.model.atmosphere.advection.hor_adv_stencil_01 import hor_adv_stencil_01
from icon4py.model.common.dimension import C2EDim, CEDim, CellDim, EdgeDim, KDim
Expand Down Expand Up @@ -74,7 +73,7 @@ def test_hor_adv_stencil_01(backend):
p_dtime,
offset_provider={
"C2E": grid.get_offset_provider("C2E"),
"C2CE": StridedNeighborOffsetProvider(CellDim, CEDim, grid.size[C2EDim]),
"C2CE": grid.get_offset_provider("C2CE"),
},
)
assert np.allclose(tracer_new.asnumpy(), ref)
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

import numpy as np
import pytest
from gt4py.next.iterator.embedded import StridedNeighborOffsetProvider

from icon4py.model.atmosphere.advection.recon_lsq_cell_c_svd_stencil import (
recon_lsq_cell_c_svd_stencil,
Expand Down Expand Up @@ -314,7 +313,7 @@ def test_recon_lsq_cell_c_svd_stencil(backend):
p_coeff_10_dsl,
offset_provider={
"C2E2C2E2C": grid.get_offset_provider("C2E2C2E2C"),
"C2CECEC": StridedNeighborOffsetProvider(CellDim, CECECDim, grid.size[C2E2C2E2CDim]),
"C2CECEC": grid.get_offset_provider("C2CECEC"),
},
)
co1 = p_coeff_1_dsl.asnumpy()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# SPDX-License-Identifier: GPL-3.0-or-later

import numpy as np
from gt4py.next.iterator.embedded import StridedNeighborOffsetProvider

from icon4py.model.atmosphere.advection.recon_lsq_cell_l_svd_stencil import (
recon_lsq_cell_l_svd_stencil,
Expand Down Expand Up @@ -66,7 +65,7 @@ def test_recon_lsq_cell_l_svd_stencil(backend):
p_coeff_3,
offset_provider={
"C2E2C": grid.get_offset_provider("C2E2C"),
"C2CEC": StridedNeighborOffsetProvider(CellDim, CECDim, grid.size[C2E2CDim]),
"C2CEC": grid.get_offset_provider("C2CEC"),
},
)
co1 = p_coeff_1.asnumpy()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,11 @@

import numpy as np
from gt4py.next.ffront.fbuiltins import int32
from gt4py.next.iterator.embedded import StridedNeighborOffsetProvider

from icon4py.model.atmosphere.advection.upwind_hflux_miura_cycl_stencil_02 import (
upwind_hflux_miura_cycl_stencil_02,
)
from icon4py.model.common.dimension import C2EDim, CEDim, CellDim, EdgeDim, KDim
from icon4py.model.common.dimension import C2EDim, CellDim, EdgeDim, KDim
from icon4py.model.common.grid.simple import SimpleGrid
from icon4py.model.common.test_utils.helpers import random_field

Expand Down Expand Up @@ -91,7 +90,7 @@ def test_upwind_hflux_miura_cycl_stencil_02(backend):
z_rho_new_dsl,
z_tracer_new_dsl,
offset_provider={
"C2CE": StridedNeighborOffsetProvider(CellDim, CEDim, grid.size[C2EDim]),
"C2CE": grid.get_offset_provider("C2CE"),
"C2E": grid.get_offset_provider("C2E"),
},
)
Expand Down
2 changes: 1 addition & 1 deletion model/atmosphere/diffusion/.pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ repos:
args: [--config-root, model/atmosphere/diffusion/, --resolve-all-configs]

- repo: https://github.com/PyCQA/flake8
rev: '4.0.1'
rev: '6.1.0'
hooks:
- id: flake8
name: flake8 code style checks
Expand Down
2 changes: 1 addition & 1 deletion model/atmosphere/dycore/.pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ repos:
args: [--config-root, model/atmosphere/dycore/, --resolve-all-configs]

- repo: https://github.com/PyCQA/flake8
rev: '4.0.1'
rev: '6.1.0'
hooks:
- id: flake8
name: flake8 code style checks
Expand Down
2 changes: 1 addition & 1 deletion model/common/.pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ repos:
args: [--config-root, model/common/, --resolve-all-configs]

- repo: https://github.com/PyCQA/flake8
rev: '4.0.1'
rev: '6.1.0'
hooks:
- id: flake8
name: flake8 code style checks
Expand Down
Loading

0 comments on commit 46f6a5f

Please sign in to comment.