Skip to content

Commit b95dce9

Browse files
committed
[CI] Fix SOTA runs
ghstack-source-id: 04c47af Pull-Request: #3252
1 parent 8736ce0 commit b95dce9

File tree

7 files changed

+86
-151
lines changed

7 files changed

+86
-151
lines changed

.github/unittest/linux/scripts/environment.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,9 @@ dependencies:
2626
- tensorboard
2727
- imageio==2.26.0
2828
- wandb
29-
- mujoco<3.3.6
3029
- mlflow
3130
- av
3231
- coverage
33-
- ray
3432
- transformers
3533
- ninja
3634
- timm

.github/unittest/linux/scripts/run_all.sh

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,23 @@ if [[ "$PYTHON_VERSION" != "3.13" && "$PYTHON_VERSION" != "3.14" ]]; then
119119
pip3 install dm_control
120120
fi
121121

122+
# Install ray for Python < 3.14 (ray doesn't support Python 3.14 yet)
123+
if [[ "$PYTHON_VERSION" != "3.14" ]]; then
124+
echo "installing ray"
125+
pip3 install ray
126+
fi
127+
128+
# Install mujoco for Python < 3.14 (mujoco doesn't have Python 3.14 wheels yet)
129+
if [[ "$PYTHON_VERSION" != "3.14" ]]; then
130+
echo "installing mujoco"
131+
pip3 install "mujoco<3.3.6"
132+
fi
133+
122134
echo "installing gymnasium"
123-
if [[ "$PYTHON_VERSION" == "3.12" ]]; then
135+
if [[ "$PYTHON_VERSION" == "3.14" ]]; then
136+
# Python 3.14: no mujoco wheels available
137+
pip3 install "gymnasium[atari]>=1.1"
138+
elif [[ "$PYTHON_VERSION" == "3.12" ]]; then
124139
pip3 install ale-py
125140
pip3 install sympy
126141
pip3 install "gymnasium[mujoco]>=1.1" mo-gymnasium[mujoco]

.github/unittest/linux_sota/scripts/environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
channels:
2-
- pytorch
32
- defaults
3+
- pytorch
44
dependencies:
55
- pip
66
- protobuf

.github/unittest/linux_sota/scripts/run_all.sh

Lines changed: 61 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ set -v
66
# ==================================================================================== #
77
# ================================ Init ============================================== #
88

9-
109
export DEBIAN_FRONTEND=noninteractive
1110
export TZ="${TZ:-Etc/UTC}"
1211
ln -snf "/usr/share/zoneinfo/${TZ}" /etc/localtime || true
@@ -17,10 +16,10 @@ apt-get install -y --no-install-recommends tzdata
1716
dpkg-reconfigure -f noninteractive tzdata || true
1817

1918
apt-get upgrade -y
20-
apt-get install -y vim git wget cmake
19+
apt-get install -y vim git wget cmake curl
2120

22-
apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev libosmesa6-dev
23-
apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2
21+
apt-get install -y libglfw3 libosmesa6 libglew-dev libosmesa6-dev
22+
apt-get install -y libglvnd0 libgl1 libglx0 libglx-mesa0 libegl1 libgles2
2423
apt-get install -y g++ gcc patchelf
2524

2625
this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
@@ -34,152 +33,94 @@ cp $this_dir/10_nvidia.json /usr/share/glvnd/egl_vendor.d/10_nvidia.json
3433
# Avoid error: "fatal: unsafe repository"
3534
git config --global --add safe.directory '*'
3635
root_dir="$(git rev-parse --show-toplevel)"
37-
conda_dir="${root_dir}/conda"
38-
env_dir="${root_dir}/env"
39-
lib_dir="${env_dir}/lib"
36+
env_dir="${root_dir}/venv"
4037

4138
cd "${root_dir}"
4239

43-
case "$(uname -s)" in
44-
Darwin*) os=MacOSX;;
45-
*) os=Linux
46-
esac
47-
48-
# 1. Install conda at ./conda
49-
if [ ! -d "${conda_dir}" ]; then
50-
printf "* Installing conda\n"
51-
wget -O miniconda.sh "http://repo.continuum.io/miniconda/Miniconda3-latest-${os}-x86_64.sh"
52-
bash ./miniconda.sh -b -f -p "${conda_dir}"
53-
fi
54-
eval "$(${conda_dir}/bin/conda shell.bash hook)"
40+
# Install uv
41+
curl -LsSf https://astral.sh/uv/install.sh | sh
42+
export PATH="$HOME/.local/bin:$PATH"
5543

56-
# 2. Create test environment at ./env
57-
printf "python: ${PYTHON_VERSION}\n"
58-
if [ ! -d "${env_dir}" ]; then
59-
printf "* Creating a test environment\n"
60-
conda create --prefix "${env_dir}" -y python="$PYTHON_VERSION"
61-
fi
62-
conda activate "${env_dir}"
63-
64-
# Verify we have CPython, not PyPy
65-
python_impl=$(python -c "import platform; print(platform.python_implementation())")
66-
if [ "$python_impl" != "CPython" ]; then
67-
echo "ERROR: Expected CPython but got $python_impl"
68-
echo "Python executable: $(which python)"
69-
echo "Python version: $(python --version)"
70-
exit 1
71-
fi
72-
printf "* Verified Python implementation: %s\n" "$python_impl"
73-
74-
# 3. Install mujoco
75-
printf "* Installing mujoco and related\n"
76-
mkdir -p $root_dir/.mujoco
77-
cd $root_dir/.mujoco/
78-
#wget https://github.com/deepmind/mujoco/releases/download/2.1.1/mujoco-2.1.1-linux-x86_64.tar.gz
79-
#tar -xf mujoco-2.1.1-linux-x86_64.tar.gz
80-
wget https://mujoco.org/download/mujoco210-linux-x86_64.tar.gz
81-
tar -xf mujoco210-linux-x86_64.tar.gz
82-
cd "${root_dir}"
44+
# Create venv with uv
45+
printf "* Creating venv with Python ${PYTHON_VERSION}\n"
46+
uv venv --python "${PYTHON_VERSION}" "${env_dir}"
47+
source "${env_dir}/bin/activate"
8348

84-
# 4. Install Conda dependencies
85-
printf "* Installing dependencies (except PyTorch)\n"
86-
# Add python version to environment.yml if not already present (idempotent)
87-
if ! grep -q "python=${PYTHON_VERSION}" "${this_dir}/environment.yml"; then
88-
echo " - python=${PYTHON_VERSION}" >> "${this_dir}/environment.yml"
89-
fi
90-
cat "${this_dir}/environment.yml"
49+
# Verify CPython
50+
python -c "import sys; assert sys.implementation.name == 'cpython', f'Expected CPython, got {sys.implementation.name}'"
9151

92-
export MUJOCO_PY_MUJOCO_PATH=$root_dir/.mujoco/mujoco210
93-
#export MJLIB_PATH=$root_dir/.mujoco/mujoco-2.1.1/lib/libmujoco.so.2.1.1
94-
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$root_dir/.mujoco/mujoco210/bin
52+
# Set environment variables
9553
export SDL_VIDEODRIVER=dummy
9654
export MUJOCO_GL=egl
9755
export PYOPENGL_PLATFORM=egl
9856
export LAZY_LEGACY_OP=False
9957
export COMPOSITE_LP_AGGREGATE=0
58+
export MAX_IDLE_COUNT=1000
59+
export DISPLAY=:99
60+
export BATCHED_PIPE_TIMEOUT=60
61+
export TOKENIZERS_PARALLELISM=true
10062

101-
conda env config vars set \
102-
MAX_IDLE_COUNT=1000 \
103-
MUJOCO_PY_MUJOCO_PATH=$root_dir/.mujoco/mujoco210 \
104-
DISPLAY=:99 \
105-
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$root_dir/.mujoco/mujoco210/bin \
106-
SDL_VIDEODRIVER=dummy \
107-
MUJOCO_GL=egl \
108-
PYOPENGL_PLATFORM=egl \
109-
BATCHED_PIPE_TIMEOUT=60 \
110-
TOKENIZERS_PARALLELISM=true
111-
112-
pip install pip --upgrade
113-
114-
conda env update --file "${this_dir}/environment.yml" --prune
115-
116-
conda deactivate
117-
conda activate "${env_dir}"
118-
119-
# install d4rl
120-
pip install free-mujoco-py
121-
pip install git+https://github.com/Farama-Foundation/d4rl@master#egg=d4rl
122-
123-
# TODO: move this down -- will break torchrl installation
124-
conda install -y -c conda-forge libstdcxx-ng=12
125-
## find libstdc - search in the env's lib directory first, then fall back to conda packages
126-
STDC_LOC=$(find "${env_dir}/lib" -name "libstdc++.so.6" 2>/dev/null | head -1)
127-
if [ -z "$STDC_LOC" ]; then
128-
# Fall back to searching in conda packages for libstdcxx-ng specifically
129-
STDC_LOC=$(find conda/pkgs -path "*libstdcxx*" -name "libstdc++.so.6" 2>/dev/null | head -1)
130-
fi
131-
if [ -z "$STDC_LOC" ]; then
132-
echo "WARNING: Could not find libstdc++.so.6, skipping LD_PRELOAD"
133-
conda env config vars set \
134-
MAX_IDLE_COUNT=1000 \
135-
TOKENIZERS_PARALLELISM=true
136-
else
137-
echo "Found libstdc++ at: $STDC_LOC"
138-
conda env config vars set \
139-
MAX_IDLE_COUNT=1000 \
140-
LD_PRELOAD=${STDC_LOC} TOKENIZERS_PARALLELISM=true
141-
fi
142-
143-
# Reactivate environment to apply the new env vars
144-
conda deactivate
145-
conda activate "${env_dir}"
146-
147-
# compile mujoco-py (bc it's done at runtime for whatever reason someone thought it was a good idea)
148-
python -c """import gym;import d4rl"""
149-
150-
# install ale-py: manylinux names are broken for CentOS so we need to manually download and
151-
# rename them
63+
# ==================================================================================== #
64+
# ================================ Install dependencies ============================== #
65+
66+
printf "* Installing dependencies\n"
67+
68+
# Install base dependencies
69+
uv pip install \
70+
hypothesis \
71+
future \
72+
cloudpickle \
73+
pygame \
74+
"moviepy<2.0.0" \
75+
tqdm \
76+
pytest \
77+
pytest-cov \
78+
pytest-mock \
79+
pytest-instafail \
80+
pytest-rerunfailures \
81+
expecttest \
82+
pybind11 \
83+
pyyaml \
84+
scipy \
85+
hydra-core \
86+
"imageio==2.26.0" \
87+
dm_control \
88+
"mujoco<3.3.6" \
89+
mlflow \
90+
av \
91+
coverage \
92+
vmas \
93+
transformers \
94+
minari
95+
96+
# Install gymnasium with atari and mujoco support
97+
uv pip install "gymnasium[atari,mujoco]>=1.1.0"
15298

15399
# ============================================================================================ #
154100
# ================================ PyTorch & TorchRL ========================================= #
155101

156-
157102
if [[ ${#CU_VERSION} -eq 4 ]]; then
158103
CUDA_VERSION="${CU_VERSION:2:1}.${CU_VERSION:3:1}"
159104
elif [[ ${#CU_VERSION} -eq 5 ]]; then
160105
CUDA_VERSION="${CU_VERSION:2:2}.${CU_VERSION:4:1}"
161106
fi
162107
echo "Using CUDA $CUDA_VERSION as determined by CU_VERSION ($CU_VERSION)"
163-
version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")"
164108

165109
# submodules
166110
git submodule sync && git submodule update --init --recursive
167111

168-
pip3 install ale-py -U
169-
pip3 install "gym[atari,accept-rom-license]" "gymnasium>=1.1.0" -U
170-
171112
printf "Installing PyTorch with %s\n" "${CU_VERSION}"
172113
if [[ "$TORCH_VERSION" == "nightly" ]]; then
173114
if [ "${CU_VERSION:-}" == cpu ] ; then
174-
pip3 install --pre torch torchvision numpy==1.26.4 --index-url https://download.pytorch.org/whl/nightly/cpu -U
115+
uv pip install --pre torch torchvision "numpy==1.26.4" --index-url https://download.pytorch.org/whl/nightly/cpu
175116
else
176-
pip3 install --pre torch torchvision numpy==1.26.4 --index-url https://download.pytorch.org/whl/nightly/$CU_VERSION
117+
uv pip install --pre torch torchvision "numpy==1.26.4" --index-url https://download.pytorch.org/whl/nightly/$CU_VERSION
177118
fi
178119
elif [[ "$TORCH_VERSION" == "stable" ]]; then
179-
if [ "${CU_VERSION:-}" == cpu ] ; then
180-
pip3 install torch torchvision numpy==1.26.4 --index-url https://download.pytorch.org/whl/cpu
120+
if [ "${CU_VERSION:-}" == cpu ] ; then
121+
uv pip install torch torchvision "numpy==1.26.4" --index-url https://download.pytorch.org/whl/cpu
181122
else
182-
pip3 install torch torchvision numpy==1.26.4 --index-url https://download.pytorch.org/whl/$CU_VERSION
123+
uv pip install torch torchvision "numpy==1.26.4" --index-url https://download.pytorch.org/whl/$CU_VERSION
183124
fi
184125
else
185126
printf "Failed to install pytorch"
@@ -189,23 +130,19 @@ fi
189130
# smoke test
190131
python -c "import functorch"
191132

192-
## install snapshot
193-
#pip install git+https://github.com/pytorch/torchsnapshot
194-
195133
# install tensordict
196134
if [[ "$RELEASE" == 0 ]]; then
197-
pip3 install git+https://github.com/pytorch/tensordict.git
135+
uv pip install git+https://github.com/pytorch/tensordict.git
198136
else
199-
pip3 install tensordict
137+
uv pip install tensordict
200138
fi
201139

202140
printf "* Installing torchrl\n"
203-
python -m pip install -e . --no-build-isolation
141+
uv pip install -e . --no-build-isolation
204142

205143
# ==================================================================================== #
206144
# ================================ Run tests ========================================= #
207145

208-
209146
bash ${this_dir}/run_test.sh
210147

211148
# ==================================================================================== #

.github/unittest/linux_sota/scripts/test_sota.py

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,21 +15,6 @@
1515
), "Composite LP must be set to False. Run this test with COMPOSITE_LP_AGGREGATE=0"
1616

1717
commands = {
18-
"dt": """python sota-implementations/decision_transformer/dt.py \
19-
optim.pretrain_gradient_steps=55 \
20-
optim.updates_per_episode=3 \
21-
optim.warmup_steps=10 \
22-
logger.backend= \
23-
env.backend=gymnasium \
24-
env.name=HalfCheetah-v4
25-
""",
26-
"online_dt": """python sota-implementations/decision_transformer/online_dt.py \
27-
optim.pretrain_gradient_steps=55 \
28-
optim.updates_per_episode=3 \
29-
optim.warmup_steps=10 \
30-
env.backend=gymnasium \
31-
logger.backend=
32-
""",
3318
"td3_bc": """python sota-implementations/td3_bc/td3_bc.py \
3419
optim.gradient_steps=55 \
3520
logger.backend=
@@ -39,7 +24,7 @@
3924
collector.frames_per_batch=20 \
4025
collector.num_workers=1 \
4126
logger.backend= \
42-
env.backend=gym \
27+
env.backend=gymnasium \
4328
logger.test_interval=10
4429
""",
4530
"ppo_mujoco": """python sota-implementations/ppo/ppo_mujoco.py \
@@ -57,7 +42,7 @@
5742
loss.mini_batch_size=20 \
5843
loss.ppo_epochs=2 \
5944
logger.backend= \
60-
env.backend=gym \
45+
env.backend=gymnasium \
6146
logger.test_interval=10
6247
""",
6348
"ddpg": """python sota-implementations/ddpg/ddpg.py \
@@ -84,7 +69,7 @@
8469
collector.frames_per_batch=20 \
8570
loss.mini_batch_size=20 \
8671
logger.backend= \
87-
env.backend=gym \
72+
env.backend=gymnasium \
8873
logger.test_interval=40
8974
""",
9075
"dqn_atari": """python sota-implementations/dqn/dqn_atari.py \
@@ -94,7 +79,7 @@
9479
buffer.batch_size=10 \
9580
loss.num_updates=1 \
9681
logger.backend= \
97-
env.backend=gym \
82+
env.backend=gymnasium \
9883
buffer.buffer_size=120
9984
""",
10085
"discrete_cql_online": """python sota-implementations/cql/discrete_cql_online.py \

.github/workflows/test-linux-sota.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@ jobs:
2626
tests:
2727
strategy:
2828
matrix:
29-
python_version: ["3.9"]
30-
cuda_arch_version: ["12.8"]
29+
python_version: ["3.10"]
30+
cuda_arch_version: ["13.0"]
3131
fail-fast: false
3232
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
3333
with:
3434
runner: linux.g5.4xlarge.nvidia.gpu
3535
repository: pytorch/rl
36-
docker-image: "nvidia/cuda:12.2.0-devel-ubuntu22.04"
36+
docker-image: "nvidia/cuda:13.0.2-cudnn-devel-ubuntu24.04"
3737
gpu-arch-type: cuda
3838
gpu-arch-version: ${{ matrix.cuda_arch_version }}
3939
timeout: 90

.github/workflows/test-linux.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ jobs:
173173
docker-image: "nvidia/cuda:13.0.2-cudnn-devel-ubuntu24.04"
174174
gpu-arch-type: cuda
175175
gpu-arch-version: ${{ matrix.cuda_arch_version }}
176-
timeout: 90
176+
timeout: 120
177177
script: |
178178
# Set env vars from matrix
179179
export PYTHON_VERSION=${{ matrix.python_version }}

0 commit comments

Comments
 (0)