Skip to content

Commit

Permalink
build rocm wheels (#1081)
Browse files Browse the repository at this point in the history
Co-authored-by: dan_the_3rd <43445237+danthe3rd@users.noreply.github.com>
  • Loading branch information
tenpercent and danthe3rd authored Aug 22, 2024
1 parent e3900ba commit e639746
Show file tree
Hide file tree
Showing 5 changed files with 124 additions and 26 deletions.
46 changes: 46 additions & 0 deletions .github/workflows/rocm_build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: rocm-build

on:
push:
branches:
- develop
pull_request:
paths:
- "packaging/compute_wheel_version.sh"
- ".github/workflows/rocm_build.yml"
- ".github/workflows/wheels_build.yml"
- "setup.py"
- "requirements*.txt"
- "xformers/csrc/attention/hip_fmha/**"
- "third_party/composable_kernel_tiled/**"
workflow_dispatch:

jobs:
build:
strategy:
fail-fast: false
matrix:
os: ['ubuntu-alola']
python: ['3.11']
torch_version: ['2.4.0']
toolkit_type: ['rocm']
toolkit_short_version: ['6.0', '6.1']

uses: ./.github/workflows/wheels_build.yml
if: github.repository == 'rocm/xformers'
with:
os: ${{ matrix.os }}
python: ${{ matrix.python }}
torch_version: ${{ matrix.torch_version }}
toolkit_type: ${{ matrix.toolkit_type }}
toolkit_short_version: ${{ matrix.toolkit_short_version }}
artifact_tag: ${{ github.run_id }}

clean:
runs-on: 'ubuntu-alola'
if: ${{ needs.build.result != 'skipped' }}
needs: [build]
steps:
- name: Remove dangling Docker images
run: |
docker images -q -f dangling=true | xargs --no-run-if-empty docker rmi
4 changes: 2 additions & 2 deletions .github/workflows/rocm_ci.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
name: ROCM_CI
name: rocm-ci

on:
pull_request:
types: [labeled, synchronize, reopened]

jobs:
build:
if: contains(github.event.label.name, 'rocm')
if: github.repository == 'rocm/xformers'
runs-on: rocm

steps:
Expand Down
43 changes: 34 additions & 9 deletions .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,18 @@ jobs:
for os in ['8-core-ubuntu', 'windows-8-core']:
for python in PY_VERSIONS:
for torch_version in ['2.4.0']:
for cuda_short_version in ["118", "121"]:
include.append(dict(
os=os,
python=python,
torch_version=torch_version,
cuda_short_version=cuda_short_version,
))
print(include[-1])
for toolkit_type, toolkit_short_versions in {'cuda': ["118", "121"], 'rocm': ["6.0", "6.1"]}.items():
if os == 'windows-8-core' and toolkit_type == 'rocm':
continue
for toolkit_short_version in toolkit_short_versions:
include.append(dict(
os=os,
python=python,
torch_version=torch_version,
toolkit_type=toolkit_type,
toolkit_short_version=toolkit_short_version,
))
print(include[-1])
matrix = {'include': include}
print(json.dumps(matrix))
with open(environ["GITHUB_OUTPUT"], "a") as fd:
Expand All @@ -60,7 +64,8 @@ jobs:
os: ${{ matrix.os }}
python: ${{ matrix.python }}
torch_version: ${{ matrix.torch_version }}
cuda_short_version: ${{ matrix.cuda_short_version }}
toolkit_type: ${{ matrix.toolkit_type }}
toolkit_short_version: ${{ matrix.toolkit_short_version }}

upload_pip:
needs: build
Expand Down Expand Up @@ -92,3 +97,23 @@ jobs:
filter: "*torch2.4.0+cu121*"
execute: ${{ github.repository == 'facebookresearch/xformers' && github.ref_type == 'tag' }}

upload_pt_rocm6_0:
needs: build
uses: ./.github/workflows/wheels_upload_s3.yml
with:
aws_role: "arn:aws:iam::749337293305:role/pytorch_bot_uploader_role"
s3_path: s3://pytorch/whl/rocm6.0/
aws_s3_cp_extra_args: --acl public-read
filter: "*torch2.4.0+rocm6.0*"
execute: ${{ github.repository == 'facebookresearch/xformers' && github.ref_type == 'tag' }}

upload_pt_rocm6_1:
needs: build
uses: ./.github/workflows/wheels_upload_s3.yml
with:
aws_role: "arn:aws:iam::749337293305:role/pytorch_bot_uploader_role"
s3_path: s3://pytorch/whl/rocm6.1/
aws_s3_cp_extra_args: --acl public-read
filter: "*torch2.4.0+rocm6.1*"
execute: ${{ github.repository == 'facebookresearch/xformers' && github.ref_type == 'tag' }}

42 changes: 33 additions & 9 deletions .github/workflows/wheels_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@ on:
required: true
type: string
description: "Example: 1.13.1"
cuda_short_version:
toolkit_type:
required: true
type: string
description: "Example: cuda for cuda, rocm for rocm"
toolkit_short_version:
required: true
type: string
description: "Example: 117 for 11.7"
Expand All @@ -26,18 +30,20 @@ on:

env:
# you need at least cuda 5.0 for some of the stuff compiled here.
TORCH_CUDA_ARCH_LIST: ${{ join('6.0+PTX 7.0 7.5 8.0+PTX', fromJSON(inputs.cuda_short_version) >= 118 && ' 9.0a' || '') }}
TORCH_CUDA_ARCH_LIST: ${{ contains(inputs.toolkit_type, 'cuda') && join('6.0+PTX 7.0 7.5 8.0+PTX', fromJSON(inputs.toolkit_short_version) >= 118 && ' 9.0a' || '') || '' }}
HIP_ARCHITECTURES: ${{ contains(inputs.toolkit_type, 'rocm') && 'gfx90a gfx942' || '' }}
MAX_JOBS: 4
DISTUTILS_USE_SDK: 1 # otherwise distutils will complain on windows about multiple versions of msvc
XFORMERS_BUILD_TYPE: "Release"
TWINE_USERNAME: __token__
XFORMERS_PACKAGE_FROM: "wheel-${{ github.ref_name }}"
# https://github.blog/changelog/2024-03-07-github-actions-all-actions-will-run-on-node20-instead-of-node16-by-default/
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: "true"
PYTORCH_INDEX_URL: "https://download.pytorch.org/whl/${{ contains(inputs.toolkit_type, 'cuda') && 'cu' || 'rocm' }}${{ inputs.toolkit_short_version }}"

jobs:
build:
name: ${{ contains(inputs.os, 'ubuntu') && 'ubuntu' || 'win' }}-py${{ inputs.python }}-pt${{ inputs.torch_version }}+cu${{ inputs.cuda_short_version }}
name: ${{ contains(inputs.os, 'ubuntu') && 'ubuntu' || 'win' }}-py${{ inputs.python }}-pt${{ inputs.torch_version }}+${{ contains(inputs.toolkit_type, 'cuda') && 'cu' || 'rocm' }}${{ inputs.toolkit_short_version }}
runs-on: ${{ inputs.os }}
env:
# alias for the current python version
Expand All @@ -59,14 +65,17 @@ jobs:
import os
import sys
print(sys.version)
cushort = "${{ inputs.cuda_short_version }}"
cushort = "${{ inputs.toolkit_short_version }}"
TORCH_CUDA_DEFAULT = "121" # pytorch 2.1.0
# https://github.com/Jimver/cuda-toolkit/blob/master/src/links/linux-links.ts
full_version, install_script = {
"121": ("12.1.0", "https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run"),
"118": ("11.8.0", "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"),
"117": ("11.7.1", "https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda_11.7.1_515.65.01_linux.run"),
"116": ("11.6.2", "https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda_11.6.2_510.47.03_linux.run"),
"6.0": ("6.0.2", "https://repo.radeon.com/amdgpu-install/6.0.2/rhel/7.9/amdgpu-install-6.0.60002-1.el7.noarch.rpm"),
"6.1": ("6.1.2", "https://repo.radeon.com/amdgpu-install/6.1.2/el/7/amdgpu-install-6.1.60102-1.el7.noarch.rpm"),
}[cushort]
with open(os.environ['GITHUB_OUTPUT'], "r+") as fp:
fp.write("CUDA_VERSION=" + full_version + "\n")
Expand All @@ -75,7 +84,7 @@ jobs:
fp.write("TORCH_ORG_S3_PATH=s3://pytorch/whl\n")
fp.write("PUBLISH_PYPI=1\n")
else:
fp.write("CUDA_VERSION_SUFFIX=+cu" + cushort + "\n")
fp.write("CUDA_VERSION_SUFFIX=+" + ("cu" if "cuda" == "${{ inputs.toolkit_type }}" else "rocm") + cushort + "\n")
fp.write("TORCH_ORG_S3_PATH=s3://pytorch/whl/" + cushort + "\n")
fp.write("PUBLISH_PYPI=0\n")
fp.write("CUDA_INSTALL_SCRIPT=" + install_script + "\n")
Expand Down Expand Up @@ -131,17 +140,32 @@ jobs:
cuda: ${{ steps.cuda_info.outputs.CUDA_VERSION }}
python: ${{ inputs.python }}

- name: Install dependencies
run: $PY -m pip install wheel setuptools twine -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu${{ inputs.cuda_short_version }}

- if: runner.os == 'Linux'
name: (Linux) list installed packages
run: |
yum list installed
- if: runner.os == 'Linux' && contains(inputs.toolkit_type, 'cuda')
name: (Linux) install cuda
run: >
yum install wget git prename -y &&
wget -q "${{ steps.cuda_info.outputs.CUDA_INSTALL_SCRIPT }}" -O cuda.run &&
sh ./cuda.run --silent --toolkit &&
rm ./cuda.run
- if: runner.os == 'Linux' && contains(inputs.toolkit_type, 'rocm')
name: (Linux) install rocm
run: |
yum install -y libzstd
yum install -y ${{ steps.cuda_info.outputs.CUDA_INSTALL_SCRIPT }}
amdgpu-install -y --usecase=rocm --no-dkms
echo "ROCM_PATH=/opt/rocm" >> ${GITHUB_ENV}
echo "PATH=$PATH:/opt/rocm/bin" >> ${GITHUB_ENV}
echo "MAX_JOBS=7" >> ${GITHUB_ENV}
- name: Install dependencies
run: $PY -m pip install wheel setuptools twine -r requirements.txt --extra-index-url $PYTORCH_INDEX_URL

- name: Build wheel
run: |
$PY setup.py bdist_wheel -d dist/ -k $PLAT_ARG
Expand All @@ -151,6 +175,6 @@ jobs:
- run: du -h dist/*
- uses: actions/upload-artifact@v3
with:
name: ${{ inputs.os }}-py${{ inputs.python }}-torch${{ inputs.torch_version }}+cu${{ inputs.cuda_short_version }}_${{ inputs.artifact_tag }}
name: ${{ inputs.os }}-py${{ inputs.python }}-torch${{ inputs.torch_version }}+${{ contains(inputs.toolkit_type, 'cuda') && 'cu' || 'rocm' }}${{ inputs.toolkit_short_version }}_${{ inputs.artifact_tag }}
path: dist/*.whl
# Note: it might be helpful to have additional steps that test if the built wheels actually work
15 changes: 9 additions & 6 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import torch
from torch.utils.cpp_extension import (
CUDA_HOME,
ROCM_HOME,
BuildExtension,
CppExtension,
CUDAExtension,
Expand Down Expand Up @@ -417,7 +418,9 @@ def get_extensions():
"--ptxas-options=-O2",
"--ptxas-options=-allow-expensive-optimizations=true",
]
elif torch.cuda.is_available() and torch.version.hip:
elif torch.version.hip and (
torch.cuda.is_available() or os.getenv("HIP_ARCHITECTURES", "") != ""
):
disable_hd256_hip_fmha = os.getenv("DISABLE_HD256_HIP_FMHA", "0")
if disable_hd256_hip_fmha == "1":
source_hip_maxk_256 = []
Expand All @@ -427,8 +430,7 @@ def get_extensions():
source_hip = list(set(source_hip) - set(source_hip_maxk_256))

rename_cpp_cu(source_hip)
rocm_home = os.getenv("ROCM_PATH")
hip_version = get_hip_version(rocm_home)
hip_version = get_hip_version(ROCM_HOME)

source_hip_cu = []
for ff in source_hip:
Expand All @@ -444,22 +446,23 @@ def get_extensions():
Path(this_dir) / "third_party" / "composable_kernel_tiled" / "include"
]

use_rtn_bf16_convert = os.getenv("ENABLE_HIP_FMHA_RTN_BF16_CONVERT", "0")

generator_flag = []
if disable_hd256_hip_fmha == "1":
generator_flag += ["-DFMHA_SUPPORT_MAX_HEADDIM_128=1"]

cc_flag = ["-DBUILD_PYTHON_PACKAGE"]
use_rtn_bf16_convert = os.getenv("ENABLE_HIP_FMHA_RTN_BF16_CONVERT", "0")
if use_rtn_bf16_convert == "1":
cc_flag += ["-DCK_TILE_FLOAT_TO_BFLOAT16_DEFAULT=0"]

arch_list = os.getenv("HIP_ARCHITECTURES", "native").split()

extra_compile_args = {
"cxx": ["-O3", "-std=c++17"] + generator_flag,
"nvcc": [
"-O3",
"-std=c++17",
f"--offload-arch={os.getenv('HIP_ARCHITECTURES', 'native')}",
*[f"--offload-arch={arch}" for arch in arch_list],
"-U__CUDA_NO_HALF_OPERATORS__",
"-U__CUDA_NO_HALF_CONVERSIONS__",
"-DCK_TILE_FMHA_FWD_FAST_EXP2=1",
Expand Down

0 comments on commit e639746

Please sign in to comment.