Enable CUDA 12.4.1 (pytorch#132202)

Trying to keep a record of the steps before I lose track of it. - 1st Commit: Similar to pytorch/builder#1720 - 2nd Commit: Update CUDA 12.4 CI CUDA versions from 12.4.0 to 12.4.1 mapping to changes in https://github.com/pytorch/pytorch/pull/125944/files - 3rd Commit: update for aarch64 install_cuda_aarch64.sh docker step - 4th Commit: pytorch@aaa456e Related pytorch#121684 - Synchronization point: Meta helps uploading pypi cuda dependencies specified in .github/scripts/generate_binary_build_matrix.py - The above pypi upload is done (thanks Andrey!), restarted jobs like https://github.com/pytorch/pytorch/actions/runs/10188203670/job/28369471321 - pytorch@7753234, use temporary docker containers (generated from a previous successful container build). If merged, these containers would be rebuilt, therefore testing them now. (5th commit) - 6th commit pytorch@5f93c62: revert the 5th commit. Update, done but have to debug seemingly irrelevant failures (rocm/xpu/mps) Pull Request resolved: pytorch#132202 Approved by: https://github.com/Skylion007, https://github.com/eqy, https://github.com/atalman
aditew01 · Sep 13, 2024 · c3eddca · c3eddca
1 parent b94b8e4
commit c3eddca
Show file tree

Hide file tree

Showing 7 changed files with 41 additions and 42 deletions.
diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh
@@ -92,7 +92,7 @@ _UCC_COMMIT=20eae37090a4ce1b32bcce6144ccad0b49943e0b
 # from scratch
 case "$image" in
   pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9)
-    CUDA_VERSION=12.4.0
+    CUDA_VERSION=12.4.1
     CUDNN_VERSION=9
     ANACONDA_PYTHON_VERSION=3.10
     GCC_VERSION=9
@@ -120,7 +120,7 @@ case "$image" in
     TRITON=yes
     ;;
   pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks)
-    CUDA_VERSION=12.4.0
+    CUDA_VERSION=12.4.1
     CUDNN_VERSION=9
     ANACONDA_PYTHON_VERSION=3.10
     GCC_VERSION=9
@@ -165,7 +165,7 @@ case "$image" in
     INDUCTOR_BENCHMARKS=yes
     ;;
   pytorch-linux-focal-cuda12.4-cudnn9-py3.12-gcc9-inductor-benchmarks)
-    CUDA_VERSION=12.4.0
+    CUDA_VERSION=12.4.1
     CUDNN_VERSION=9
     ANACONDA_PYTHON_VERSION=3.12
     GCC_VERSION=9
@@ -194,7 +194,7 @@ case "$image" in
     TRITON=yes
     ;;
   pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9)
-    CUDA_VERSION=12.4.0
+    CUDA_VERSION=12.4.1
     CUDNN_VERSION=9
     ANACONDA_PYTHON_VERSION=3.10
     GCC_VERSION=9
@@ -222,7 +222,7 @@ case "$image" in
     TRITON=yes
     ;;
   pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9)
-    CUDA_VERSION=12.4.0
+    CUDA_VERSION=12.4.1
     CUDNN_VERSION=9
     ANACONDA_PYTHON_VERSION=3.10
     GCC_VERSION=9

diff --git a/.ci/docker/common/install_cuda.sh b/.ci/docker/common/install_cuda.sh
@@ -94,13 +94,13 @@ function install_121 {
 }
 
 function install_124 {
-  echo "Installing CUDA 12.4 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
+  echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
   rm -rf /usr/local/cuda-12.4 /usr/local/cuda
-  # install CUDA 12.4.0 in the same container
-  wget -q https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run
-  chmod +x cuda_12.4.0_550.54.14_linux.run
-  ./cuda_12.4.0_550.54.14_linux.run --toolkit --silent
-  rm -f cuda_12.4.0_550.54.14_linux.run
+  # install CUDA 12.4.1 in the same container
+  wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux.run
+  chmod +x cuda_12.4.1_550.54.15_linux.run
+  ./cuda_12.4.1_550.54.15_linux.run --toolkit --silent
+  rm -f cuda_12.4.1_550.54.15_linux.run
   rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.4 /usr/local/cuda
 
   # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement

diff --git a/.ci/docker/common/install_cuda_aarch64.sh b/.ci/docker/common/install_cuda_aarch64.sh
@@ -17,13 +17,13 @@ function install_cusparselt_052 {
 }
 
 function install_124 {
-  echo "Installing CUDA 12.4 and cuDNN 9.1 and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
+  echo "Installing CUDA 12.4.1 and cuDNN 9.1 and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
   rm -rf /usr/local/cuda-12.4 /usr/local/cuda
-  # install CUDA 12.4.0 in the same container
-  wget -q https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux_sbsa.run
-  chmod +x cuda_12.4.0_550.54.14_linux_sbsa.run
-  ./cuda_12.4.0_550.54.14_linux_sbsa.run --toolkit --silent
-  rm -f cuda_12.4.0_550.54.14_linux_sbsa.run
+  # install CUDA 12.4.1 in the same container
+  wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux_sbsa.run
+  chmod +x cuda_12.4.1_550.54.15_linux_sbsa.run
+  ./cuda_12.4.1_550.54.15_linux_sbsa.run --toolkit --silent
+  rm -f cuda_12.4.1_550.54.15_linux_sbsa.run
   rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.4 /usr/local/cuda
 
   # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement

diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh
@@ -657,8 +657,7 @@ test_inductor_torchbench_smoketest_perf() {
   # https://github.com/pytorch/pytorch/actions/runs/7158691360/job/19491437314,
   # and thus we lower its threshold to reduce flakiness. If this continues to be a problem,
   # we switch to use some other model.
-  # lowering threshold from 4.9 to 4.7 for cu124. Will bump it up after cuda 12.4.0->12.4.1 update
-  python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" -t 4.7
+  python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" -t 4.9
 
   # Check memory compression ratio for a few models
   for test in hf_Albert timm_vision_transformer; do

diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py
@@ -18,7 +18,7 @@
 CUDA_ARCHES = ["11.8", "12.1", "12.4"]
 
 
-CUDA_ARCHES_FULL_VERSION = {"11.8": "11.8.0", "12.1": "12.1.1", "12.4": "12.4.0"}
+CUDA_ARCHES_FULL_VERSION = {"11.8": "11.8.0", "12.1": "12.1.1", "12.4": "12.4.1"}
 
 
 CUDA_ARCHES_CUDNN_VERSION = {"11.8": "9", "12.1": "9", "12.4": "9"}
@@ -68,18 +68,18 @@
         "nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'"
     ),
     "12.4": (
-        "nvidia-cuda-nvrtc-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-cuda-runtime-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-cuda-cupti-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+        "nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+        "nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+        "nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | "
         "nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-cublas-cu12==12.4.2.65; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-cufft-cu12==11.2.0.44; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-curand-cu12==10.3.5.119; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-cusolver-cu12==11.6.0.99; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-cusparse-cu12==12.3.0.142; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+        "nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+        "nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+        "nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+        "nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+        "nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | "
         "nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-nvtx-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-nvjitlink-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64'"
+        "nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+        "nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64'"
     ),
 }
 

diff --git a/.github/workflows/generated-linux-binary-manywheel-main.yml b/.github/workflows/generated-linux-binary-manywheel-main.yml