alan-turing-institute · Iain-S · Jul 25, 2025 · Jul 25, 2025 · Jul 25, 2025 · Jul 25, 2025
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,14 @@
+[flake8]
+extend_exclude =
+    .git,
+    __pycache__,
+    .venv,
+    venv,
+    .mypy_cache,
+    .pytest_cache,
+    baskerville/*,
+    dawn/*,
+    train/*,
+    macos/scripts/try_sacred.py,
+    macos/scripts/wandb_mt.py
+max_line_length = 120
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -0,0 +1,34 @@
+---
+
+name: Lint
+
+on:
+  pull_request:
+    branches:
+      - main
+  push:
+    branches:
+      - main
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+
+    steps:
+      - name: Checkout Code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.12'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r macos/environments/requirements.txt
+
+      - uses: pre-commit/[email protected]
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -0,0 +1,29 @@
+name: Unit Tests
+on:
+  pull_request:
+    branches:
+      - main
+  push:
+    branches:
+      - main
+jobs:
+  unit-tests:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.head_ref || github.ref_name }}
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.12'
+      - name: Install dependencies
+        run: |
+            python -m pip install --upgrade pip
+            pip install -r macos/environments/requirements.txt
+      - name: Run tests
+        run: |
+            python test_utils.py
+        working-directory: macos/scripts
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -23,3 +23,15 @@ repos:
       - id: isort
         name: isort (python)
         exclude: baskerville
+  - repo: https://github.com/pycqa/flake8
+    rev: '7.3.0'
+    hooks:
+      - id: flake8
+        args: ["--config=.flake8"]
+  - repo: local
+    hooks:
+      - id: mypy
+        name: MyPy
+        entry: mypy --install-types --non-interactive --config-file=mypy.ini .
+        language: system
+        pass_filenames: false
diff --git a/macos/environments/requirements.txt b/macos/environments/requirements.txt
@@ -0,0 +1,94 @@
+annotated-types==0.7.0
+asttokens==3.0.0
+attrs==25.3.0
+azure-core==1.34.0
+azure-storage-blob==12.25.1
+black==25.1.0
+cdsapi==0.7.6
+certifi==2025.6.15
+cffi==1.17.1
+cftime==1.6.4.post1
+charset-normalizer==3.4.2
+click==8.2.1
+colorama==0.4.6
+cryptography==45.0.4
+decorator==5.2.1
+dnspython==2.7.0
+docopt-ng==0.9.0
+ecmwf-datastores-client==0.2.0
+einops==0.8.1
+executing==2.2.0
+filelock==3.18.0
+flake8==7.3.0
+fsspec==2025.5.1
+gitdb==4.0.12
+GitPython==3.1.45
+hf-xet==1.1.5
+huggingface-hub==0.33.1
+idna==3.10
+ipython==9.3.0
+ipython_pygments_lexers==1.1.1
+isodate==0.7.2
+isort==6.0.1
+jedi==0.19.2
+Jinja2==3.1.6
+jsonpickle==4.1.1
+MarkupSafe==3.0.2
+matplotlib-inline==0.1.7
+mccabe==0.7.0
+-e git+https://[email protected]/microsoft/aurora.git@924680d5c74d4ddb61ca6fcbed7676a0ec282b71#egg=microsoft_aurora
+mpmath==1.3.0
+multiurl==0.3.5
+munch==4.0.0
+mypy==1.17.0
+mypy_extensions==1.1.0
+netCDF4==1.7.2
+networkx==3.5
+numpy==2.3.1
+nvidia-ml-py==12.575.51
+packaging==25.0
+pandas==2.3.0
+parso==0.8.4
+pathspec==0.12.1
+pexpect==4.9.0
+pillow==11.2.1
+platformdirs==4.3.8
+prompt_toolkit==3.0.51
+protobuf==6.31.1
+ptyprocess==0.7.0
+pure_eval==0.2.3
+py-cpuinfo==9.0.0
+pycodestyle==2.14.0
+pycparser==2.22
+pydantic==2.11.7
+pydantic_core==2.33.2
+pyflakes==3.4.0
+Pygments==2.19.2
+pymongo==4.13.2
+python-dateutil==2.9.0.post0
+pytz==2025.2
+PyYAML==6.0.2
+requests==2.32.4
+sacred==0.8.7
+scipy==1.16.0
+sentry-sdk==2.33.2
+setuptools==80.9.0
+six==1.17.0
+smmap==5.0.2
+snakeviz==2.2.2
+stack-data==0.6.3
+sympy==1.14.0
+timm==0.6.13
+torch==2.7.1
+torchvision==0.22.1
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+typing-inspection==0.4.1
+typing_extensions==4.14.0
+tzdata==2025.2
+urllib3==2.5.0
+wandb==0.21.0
+wcwidth==0.2.13
+wrapt==1.17.2
+xarray==2025.6.1
diff --git a/macos/scripts/test_utils.py b/macos/scripts/test_utils.py
@@ -0,0 +1,146 @@
+import os
+import pathlib
+import unittest
+from unittest.mock import patch
+
+import utils
+
+
+class TestCudaMetrics(unittest.TestCase):
+
+    def test_metrics_are_available(self):
+        """CUDA metrics availability depends on the system."""
+        with patch("torch.cuda.is_available", autospec=True, return_value=True):
+            self.assertTrue(utils.CudaMetrics.are_available())
+
+    def test_get_metrics(self):
+        """CUDA metrics should return a dictionary with expected keys."""
+        cuda_metrics = utils.CudaMetrics()
+        with patch("torch.cuda.utilization", autospec=True, return_value=75):
+            metrics = cuda_metrics.get_metrics()[0]
+        self.assertEqual(75, metrics["gpu_util"])
+
+
+class TestXpuMetricsV1(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        def which(cmd):
+            return "/path/to/xpu-smi" if cmd == "xpu-smi" else None
+
+        self.which = which
+        self.test_file = pathlib.Path("dummy_xpu_metrics.txt")
+
+    def setUp(self):
+        """Set up the test environment."""
+        # Note that xpu-smi files don't end in a newline.
+        text = "gpu,tile,util\n0,0,5\n0,1,3\n0,0,50\n0,1,30"
+        self.test_file.write_text(text, encoding="utf-8")
+
+    def tearDown(self):
+        """Clean up the test environment."""
+        self.test_file.unlink(missing_ok=True)
+
+    def test_metrics_are_available(self):
+        """XPU metrics availability depends on the system."""
+        with patch(
+            "utils.which",
+            autospec=True,
+        ):
+            self.assertTrue(utils.XpuMetricsV1.are_available())
+
+    def test_get_metrics(self):
+
+        # 'dump', '--metrics', '0,1', '--device', '-1', '-n', '1'
+        metrics = utils.XpuMetricsV1(metrics_file=self.test_file).get_metrics()
+        self.assertEqual({"gpu": "0", "tile": "0", "util": "50"}, metrics[0])
+        self.assertEqual({"gpu": "0", "tile": "1", "util": "30"}, metrics[1])
+
+    def test_get_metrics_invalid_device(self):
+
+        with self.assertRaises(utils.DeviceIdError) as context:
+            utils.XpuMetricsV1(metrics_file=self.test_file).get_last_device_entry(99)
+        self.assertEqual(
+            "Device ID 99 not found in the metrics file.", str(context.exception)
+        )
+
+    def test_previous_line(self):
+        xpu_metrics = utils.XpuMetricsV1(metrics_file=self.test_file)
+        with self.test_file.open("rb") as f:
+            f.seek(-1, os.SEEK_END)
+            line = xpu_metrics.get_previous_line(f)
+            self.assertEqual("0,1,30", line)
+            line = xpu_metrics.get_previous_line(f)
+            self.assertEqual("0,0,50", line)
+            line = xpu_metrics.get_previous_line(f)
+            self.assertEqual("0,1,3", line)
+            line = xpu_metrics.get_previous_line(f)
+            self.assertEqual("0,0,5", line)
+
+
+class TestXpuMetricsV2(unittest.TestCase):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.old_path = os.environ["PATH"].split(":")
+
+    @classmethod
+    def setUpClass(cls):
+        """Set up the test environment."""
+        cls.old_path = os.environ["PATH"].split(":")
+        os.environ["PATH"] = ":".join(["xpu_smi_dir/"] + cls.old_path)
+
+    @classmethod
+    def tearDownClass(cls):
+        """Clean up the test environment."""
+        os.environ["PATH"] = ":".join(cls.old_path)
+
+    def test_metrics_are_available(self):
+        """XPU metrics availability depends on the system."""
+        with patch(
+            "utils.which",
+            autospec=True,
+        ):
+            self.assertTrue(utils.XpuMetricsV2.are_available())
+
+    def test_get_metrics(self):
+        metrics = utils.XpuMetricsV2().get_metrics()
+        reduced_metrics = [
+            {
+                k: v
+                for k, v in metric.items()
+                if k in ["Timestamp", "DeviceId", "TileId", "Compute Engine 0 (%)"]
+            }
+            for metric in metrics
+        ]
+
+        self.assertListEqual(
+            [
+                {
+                    "Timestamp": "16:53:07.186",
+                    "DeviceId": "0",
+                    "TileId": "0",
+                    "Compute Engine 0 (%)": "0.00",
+                },
+                {
+                    "Timestamp": "16:53:07.187",
+                    "DeviceId": "0",
+                    "TileId": "1",
+                    "Compute Engine 0 (%)": "0.00",
+                },
+            ],
+            reduced_metrics,
+        )
+
+    def test_get_metrics_two(self):
+        with patch("utils.run", autospec=True) as mock_run:
+            utils.XpuMetricsV2().get_metrics()
+            mock_run.assert_called_once_with(
+                ["xpu-smi", "dump", "--metrics", "0,1", "--device", "-1", "-n", "1"],
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/macos/scripts/test_wandb_mp.py b/macos/scripts/test_wandb_mp.py
diff --git a/macos/scripts/try_sacred.py b/macos/scripts/try_sacred.py
@@ -0,0 +1,25 @@
+from sacred import Experiment
+from sacred.observers import FileStorageObserver, MongoObserver
+
+ex = Experiment()
+# ex.observers.append(FileStorageObserver('my_runs'))
+ex.observers.append(
+    MongoObserver(db_name="sacred_test", url="mongodb://localhost:27017/")
+)
+
+
+@ex.config
+def my_config():
+    recipient = "world"
+    message = "Hello %s!" % recipient
+
+
+@ex.automain
+def my_main(message):
+    """See https://sacred.readthedocs.io/en/stable/quickstart.html"""
+    print(message)
+
+
+from sacred import Experiment
+
+ex = Experiment("hello_config")
diff --git a/macos/scripts/try_wandb.py b/macos/scripts/try_wandb.py
@@ -0,0 +1,31 @@
+import random
+
+import wandb
+
+# Start a new wandb run to track this script.
+run = wandb.init(
+    # Set the wandb entity where your project will be logged (generally your team name).
+    entity="iain_",
+    # Set the wandb project where this run will be logged.
+    project="my-awesome-project",
+    # Track hyperparameters and run metadata.
+    config={
+        "learning_rate": 0.02,
+        "architecture": "CNN",
+        "dataset": "CIFAR-100",
+        "epochs": 10,
+    },
+)
+
+# Simulate training.
+epochs = 10
+offset = random.random() / 5
+for epoch in range(2, epochs):
+    acc = 1 - 2**-epoch - random.random() / epoch - offset
+    loss = 2**-epoch + random.random() / epoch + offset
+
+    # Log metrics to wandb.
+    run.log({"acc": acc, "loss": loss})
+
+# Finish the run and upload any remaining data.
+run.finish()