[CI] Fix SOTA runs

vmoens · vmoens · commit cb51271f068a · 2025-12-12T15:59:45.000Z
ghstack-source-id: 29be2b7 Pull-Request: #3252
diff --git a/.github/unittest/linux/scripts/environment.yml b/.github/unittest/linux/scripts/environment.yml
@@ -26,11 +26,9 @@ dependencies:
     - tensorboard
     - imageio==2.26.0
     - wandb
-    - mujoco<3.3.6
     - mlflow
     - av
     - coverage
-    - ray
     - transformers
     - ninja
     - timm
diff --git a/.github/unittest/linux/scripts/run_all.sh b/.github/unittest/linux/scripts/run_all.sh
@@ -119,8 +119,23 @@ if [[ "$PYTHON_VERSION" != "3.13" && "$PYTHON_VERSION" != "3.14" ]]; then
   pip3 install dm_control
 fi
 
+# Install ray for Python < 3.14 (ray doesn't support Python 3.14 yet)
+if [[ "$PYTHON_VERSION" != "3.14" ]]; then
+  echo "installing ray"
+  pip3 install ray
+fi
+
+# Install mujoco for Python < 3.14 (mujoco doesn't have Python 3.14 wheels yet)
+if [[ "$PYTHON_VERSION" != "3.14" ]]; then
+  echo "installing mujoco"
+  pip3 install "mujoco<3.3.6"
+fi
+
 echo "installing gymnasium"
-if [[ "$PYTHON_VERSION" == "3.12" ]]; then
+if [[ "$PYTHON_VERSION" == "3.14" ]]; then
+  # Python 3.14: no mujoco wheels available
+  pip3 install "gymnasium[atari]>=1.1"
+elif [[ "$PYTHON_VERSION" == "3.12" ]]; then
   pip3 install ale-py
   pip3 install sympy
   pip3 install "gymnasium[mujoco]>=1.1" mo-gymnasium[mujoco]
diff --git a/.github/unittest/linux_sota/scripts/environment.yml b/.github/unittest/linux_sota/scripts/environment.yml
@@ -1,6 +1,6 @@
 channels:
-  - pytorch
   - defaults
+  - pytorch
 dependencies:
   - pip
   - protobuf
diff --git a/.github/unittest/linux_sota/scripts/run_all.sh b/.github/unittest/linux_sota/scripts/run_all.sh
@@ -19,8 +19,8 @@ dpkg-reconfigure -f noninteractive tzdata || true
 apt-get upgrade -y
 apt-get install -y vim git wget cmake
 
-apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev libosmesa6-dev
-apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2
+apt-get install -y libglfw3 libosmesa6 libglew-dev libosmesa6-dev
+apt-get install -y libglvnd0 libgl1 libglx0 libglx-mesa0 libegl1 libgles2
 apt-get install -y g++ gcc patchelf
 
 this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
@@ -36,7 +36,6 @@ git config --global --add safe.directory '*'
 root_dir="$(git rev-parse --show-toplevel)"
 conda_dir="${root_dir}/conda"
 env_dir="${root_dir}/env"
-lib_dir="${env_dir}/lib"
 
 cd "${root_dir}"
 
@@ -57,29 +56,15 @@ eval "$(${conda_dir}/bin/conda shell.bash hook)"
 printf "python: ${PYTHON_VERSION}\n"
 if [ ! -d "${env_dir}" ]; then
     printf "* Creating a test environment\n"
-    conda create --prefix "${env_dir}" -y python="$PYTHON_VERSION"
+    # Force CPython from the main conda channels (avoid GraalPy).
+    conda create --override-channels -c defaults -c pytorch --prefix "${env_dir}" -y python="$PYTHON_VERSION"
 fi
 conda activate "${env_dir}"
 
-# Verify we have CPython, not PyPy
-python_impl=$(python -c "import platform; print(platform.python_implementation())")
-if [ "$python_impl" != "CPython" ]; then
-    echo "ERROR: Expected CPython but got $python_impl"
-    echo "Python executable: $(which python)"
-    echo "Python version: $(python --version)"
-    exit 1
-fi
-printf "* Verified Python implementation: %s\n" "$python_impl"
+# Verify we're running CPython (wheels won't work on GraalPy)
+python -c "import sys; assert sys.implementation.name == 'cpython', f'Expected CPython, got {sys.implementation.name}'"
 
 # 3. Install mujoco
-printf "* Installing mujoco and related\n"
-mkdir -p $root_dir/.mujoco
-cd $root_dir/.mujoco/
-#wget https://github.com/deepmind/mujoco/releases/download/2.1.1/mujoco-2.1.1-linux-x86_64.tar.gz
-#tar -xf mujoco-2.1.1-linux-x86_64.tar.gz
-wget https://mujoco.org/download/mujoco210-linux-x86_64.tar.gz
-tar -xf mujoco210-linux-x86_64.tar.gz
-cd "${root_dir}"
 
 # 4. Install Conda dependencies
 printf "* Installing dependencies (except PyTorch)\n"
@@ -89,9 +74,6 @@ if ! grep -q "python=${PYTHON_VERSION}" "${this_dir}/environment.yml"; then
 fi
 cat "${this_dir}/environment.yml"
 
-export MUJOCO_PY_MUJOCO_PATH=$root_dir/.mujoco/mujoco210
-#export MJLIB_PATH=$root_dir/.mujoco/mujoco-2.1.1/lib/libmujoco.so.2.1.1
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$root_dir/.mujoco/mujoco210/bin
 export SDL_VIDEODRIVER=dummy
 export MUJOCO_GL=egl
 export PYOPENGL_PLATFORM=egl
@@ -100,26 +82,21 @@ export COMPOSITE_LP_AGGREGATE=0
 
 conda env config vars set \
   MAX_IDLE_COUNT=1000 \
-  MUJOCO_PY_MUJOCO_PATH=$root_dir/.mujoco/mujoco210 \
   DISPLAY=:99 \
-  LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$root_dir/.mujoco/mujoco210/bin \
   SDL_VIDEODRIVER=dummy \
   MUJOCO_GL=egl \
   PYOPENGL_PLATFORM=egl \
   BATCHED_PIPE_TIMEOUT=60 \
   TOKENIZERS_PARALLELISM=true
 
-pip install pip --upgrade
+# Use python -m pip to ensure we use conda's Python, not system GraalPy
+python -m pip install pip --upgrade
 
 conda env update --file "${this_dir}/environment.yml" --prune
 
 conda deactivate
 conda activate "${env_dir}"
 
-# install d4rl
-pip install free-mujoco-py
-pip install git+https://github.com/Farama-Foundation/d4rl@master#egg=d4rl
-
 # TODO: move this down -- will break torchrl installation
 conda install -y -c conda-forge libstdcxx-ng=12
 ## find libstdc - search in the env's lib directory first, then fall back to conda packages
@@ -144,12 +121,6 @@ fi
 conda deactivate
 conda activate "${env_dir}"
 
-# compile mujoco-py (bc it's done at runtime for whatever reason someone thought it was a good idea)
-python -c """import gym;import d4rl"""
-
-# install ale-py: manylinux names are broken for CentOS so we need to manually download and
-# rename them
-
 # ============================================================================================ #
 # ================================ PyTorch & TorchRL ========================================= #
 
@@ -160,26 +131,24 @@ elif [[ ${#CU_VERSION} -eq 5 ]]; then
     CUDA_VERSION="${CU_VERSION:2:2}.${CU_VERSION:4:1}"
 fi
 echo "Using CUDA $CUDA_VERSION as determined by CU_VERSION ($CU_VERSION)"
-version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")"
-
 # submodules
 git submodule sync && git submodule update --init --recursive
 
-pip3 install ale-py -U
-pip3 install "gym[atari,accept-rom-license]" "gymnasium>=1.1.0" -U
+# Gymnasium Atari support pulls ale-py (+ ROMs) as needed.
+python -m pip install -U "gymnasium[atari,accept-rom-license,mujoco]>=1.1.0"
 
 printf "Installing PyTorch with %s\n" "${CU_VERSION}"
 if [[ "$TORCH_VERSION" == "nightly" ]]; then
   if [ "${CU_VERSION:-}" == cpu ] ; then
-      pip3 install --pre torch torchvision numpy==1.26.4 --index-url https://download.pytorch.org/whl/nightly/cpu -U
+      python -m pip install --pre torch torchvision numpy==1.26.4 --index-url https://download.pytorch.org/whl/nightly/cpu -U
   else
-      pip3 install --pre torch torchvision numpy==1.26.4 --index-url https://download.pytorch.org/whl/nightly/$CU_VERSION
+      python -m pip install --pre torch torchvision numpy==1.26.4 --index-url https://download.pytorch.org/whl/nightly/$CU_VERSION
   fi
 elif [[ "$TORCH_VERSION" == "stable" ]]; then
     if [ "${CU_VERSION:-}" == cpu ] ; then
-      pip3 install torch torchvision numpy==1.26.4 --index-url https://download.pytorch.org/whl/cpu
+      python -m pip install torch torchvision numpy==1.26.4 --index-url https://download.pytorch.org/whl/cpu
   else
-      pip3 install torch torchvision numpy==1.26.4 --index-url https://download.pytorch.org/whl/$CU_VERSION
+      python -m pip install torch torchvision numpy==1.26.4 --index-url https://download.pytorch.org/whl/$CU_VERSION
   fi
 else
   printf "Failed to install pytorch"
@@ -194,9 +163,9 @@ python -c "import functorch"
 
 # install tensordict
 if [[ "$RELEASE" == 0 ]]; then
-  pip3 install git+https://github.com/pytorch/tensordict.git
+  python -m pip install git+https://github.com/pytorch/tensordict.git
 else
-  pip3 install tensordict
+  python -m pip install tensordict
 fi
 
 printf "* Installing torchrl\n"
diff --git a/.github/unittest/linux_sota/scripts/test_sota.py b/.github/unittest/linux_sota/scripts/test_sota.py
@@ -15,21 +15,6 @@
 ), "Composite LP must be set to False. Run this test with COMPOSITE_LP_AGGREGATE=0"
 
 commands = {
-    "dt": """python sota-implementations/decision_transformer/dt.py \
-  optim.pretrain_gradient_steps=55 \
-  optim.updates_per_episode=3 \
-  optim.warmup_steps=10 \
-  logger.backend= \
-  env.backend=gymnasium \
-  env.name=HalfCheetah-v4
-""",
-    "online_dt": """python sota-implementations/decision_transformer/online_dt.py \
-  optim.pretrain_gradient_steps=55 \
-  optim.updates_per_episode=3 \
-  optim.warmup_steps=10 \
-  env.backend=gymnasium \
-  logger.backend=
-""",
     "td3_bc": """python sota-implementations/td3_bc/td3_bc.py \
   optim.gradient_steps=55 \
   logger.backend=
@@ -39,7 +24,7 @@
   collector.frames_per_batch=20 \
   collector.num_workers=1 \
   logger.backend= \
-  env.backend=gym \
+  env.backend=gymnasium \
   logger.test_interval=10
 """,
     "ppo_mujoco": """python sota-implementations/ppo/ppo_mujoco.py \
@@ -57,7 +42,7 @@
   loss.mini_batch_size=20 \
   loss.ppo_epochs=2 \
   logger.backend= \
-  env.backend=gym \
+  env.backend=gymnasium \
   logger.test_interval=10
 """,
     "ddpg": """python sota-implementations/ddpg/ddpg.py \
@@ -84,7 +69,7 @@
   collector.frames_per_batch=20 \
   loss.mini_batch_size=20 \
   logger.backend= \
-  env.backend=gym \
+  env.backend=gymnasium \
   logger.test_interval=40
 """,
     "dqn_atari": """python sota-implementations/dqn/dqn_atari.py \
@@ -94,7 +79,7 @@
   buffer.batch_size=10 \
   loss.num_updates=1 \
   logger.backend= \
-  env.backend=gym \
+  env.backend=gymnasium \
   buffer.buffer_size=120
 """,
     "discrete_cql_online": """python sota-implementations/cql/discrete_cql_online.py \
diff --git a/.github/workflows/test-linux-sota.yml b/.github/workflows/test-linux-sota.yml
@@ -26,14 +26,14 @@ jobs:
   tests:
     strategy:
       matrix:
-        python_version: ["3.9"]
-        cuda_arch_version: ["12.8"]
+        python_version: ["3.10"]
+        cuda_arch_version: ["13.0"]
       fail-fast: false
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
       runner: linux.g5.4xlarge.nvidia.gpu
       repository: pytorch/rl
-      docker-image: "nvidia/cuda:12.2.0-devel-ubuntu22.04"
+      docker-image: "nvidia/cuda:13.0.2-cudnn-devel-ubuntu24.04"
       gpu-arch-type: cuda
       gpu-arch-version: ${{ matrix.cuda_arch_version }}
       timeout: 90
diff --git a/.github/workflows/test-linux.yml b/.github/workflows/test-linux.yml
@@ -173,7 +173,7 @@ jobs:
       docker-image: "nvidia/cuda:13.0.2-cudnn-devel-ubuntu24.04"
       gpu-arch-type: cuda
       gpu-arch-version: ${{ matrix.cuda_arch_version }}
-      timeout: 90
+      timeout: 120
       script: |
         # Set env vars from matrix
         export PYTHON_VERSION=${{ matrix.python_version }}