Update default target ROCm architectures in OSS build (#5219)

q10 · meta-codesync[bot] · commit 4e03118ecc68 · 2025-12-12T01:58:28.000-08:00
Summary: X-link: https://github.com/facebookresearch/FBGEMM/pull/2216 Pull Request resolved: #5219 - Update default target ROCm architectures in OSS builds Reviewed By: jwfromm Differential Revision: D88923679 fbshipit-source-id: 811f6a9c6a1eb22e5601bfb5865352c18a7b2b7e
diff --git a/.github/scripts/fbgemm_gpu_build.bash b/.github/scripts/fbgemm_gpu_build.bash
@@ -222,13 +222,17 @@ __configure_fbgemm_gpu_build_rocm () {
     # the value set to 0), we are building in Nova.  Nova machines take much
     # longer time to build FBGEMM_GPU for ROCm, so we have to limit to just the
     # latest model.
-    echo "[BUILD] Building in Nova environment, ignoring the provided PYTORCH_ROCM_ARCH list and limiting ROCm targets ..."
+    echo "[BUILD] Building in Nova environment, which is resource-constrained - will be ignoring the provided PYTORCH_ROCM_ARCH list and limiting ROCm targets ..."
     local arch_list="gfx942"
 
   else
     # If BUILD_FROM_NOVA is unset, then we are building from a compute host with
     # sufficient resources, so we can build for more AMD Instinct architectures.
-    local arch_list="gfx908,gfx90a,gfx942"
+    if [[ ${rocm_version_arr[0]} -ge 7 ]]; then
+      local arch_list="gfx908,gfx90a,gfx942,gfx950"
+    else
+      local arch_list="gfx908,gfx90a,gfx942"
+    fi
   fi
 
   echo "[BUILD] Setting the following ROCm targets: ${arch_list}"
diff --git a/.github/scripts/utils_base.bash b/.github/scripts/utils_base.bash
@@ -298,3 +298,36 @@ set_clang_symlinks () {
   print_exec ln -sf "${cxx_path}" "$(dirname "$cxx_path")/c++"
   print_exec ln -sf "${cxx_path}" "$(dirname "$cxx_path")/g++"
 }
+
+__fetch_cuda_version_array () {
+  local env_name="$1"
+  # shellcheck disable=SC2155
+  local env_prefix=$(env_name_or_prefix "${env_name}")
+
+  # shellcheck disable=SC2155,SC2086
+  local cuda_version=$(conda run ${env_prefix} nvcc --version | sed -n 's/^.*release \([0-9]\+\.[0-9]\+\).*$/\1/p')
+  echo "[INFO] Extracted CUDA version: ${cuda_version}"
+
+  # shellcheck disable=SC2206
+  export cuda_version_arr=(${cuda_version//./ })
+}
+
+__fetch_rocm_version_array () {
+  local env_name="$1"
+  # shellcheck disable=SC2155
+  local env_prefix=$(env_name_or_prefix "${env_name}")
+
+  # shellcheck disable=SC2155,SC2086
+  local rocm_version=$(conda run ${env_prefix} python -c "import torch; print(torch.version.hip)" 2>/dev/null)
+
+  if [ -n "$rocm_version" ] && [ "$rocm_version" != "None" ]; then
+    echo "[INFO] Extracted ROCm version: ${rocm_version}"
+    # Extract version numbers (e.g., "5.7.0" -> array of [5, 7, 0])
+    IFS='.' read -ra rocm_version_arr <<< "$rocm_version"
+    export rocm_version_arr=(${rocm_version//./ })
+
+  else
+    echo "[INFO] Could not extract ROCm version!"
+    return 1
+  fi
+}