diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 36a8b6e3..198b41ad 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -7,7 +7,7 @@
   "workspaceFolder": "/ws",
   "containerEnv": {
     "ROS_DOMAIN_ID": "42",
-    "ROS_LOCALHOST_ONLY": "1",
+    "ROS_LOCALHOST_ONLY": "0",
     "CYCLONEDDS_URI": "file:///ws/docker/cyclonedds.xml"
   },
   "customizations": {
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 0f7c8237..dee58592 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -51,6 +51,12 @@ jobs:
           cp -a "${GITHUB_WORKSPACE}/ws-src/requirements.txt" /ws/requirements.txt || true
           ls /ws
 
+      - name: Install Python requirements
+        shell: bash
+        run: |
+          set -exo pipefail
+          pip install --no-cache-dir -r /ws/requirements.txt
+
       - name: Restore colcon build cache
         id: colcon-cache
         uses: actions/cache@v4
diff --git a/.gitignore b/.gitignore
index 2734fac9..a054c13f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -176,4 +176,6 @@ cython_debug/
 # Ros build ignored
 install
 log
+logs
+kart_logs
 build
diff --git a/compose/docker-compose.yml b/compose/docker-compose.yml
index e2530e7c..c4d88d88 100644
--- a/compose/docker-compose.yml
+++ b/compose/docker-compose.yml
@@ -3,7 +3,7 @@ services:
   dev:
     image: ghcr.io/evc-purdue/ros2-humble-dev:main
     container_name: ros2-dev
-    hostname: ros2-dev
+    hostname: "${TARGET_DEVICE:-jetson}-dev"
     volumes:
       - ../:/ws
       - ros_cache:/root/.cache
@@ -11,20 +11,19 @@ services:
     environment:
       - ROS_DISTRO=humble
       - ROS_DOMAIN_ID=42
-      - ROS_LOCALHOST_ONLY=1
+      - ROS_LOCALHOST_ONLY=0
       - CYCLONEDDS_URI=file:///ws/docker/cyclonedds.xml
-    ports:
-      - "8000:8000"
+      - TARGET_DEVICE=${TARGET_DEVICE:-jetson}
+      - ROS_NETWORK_INTERFACE=${ROS_NETWORK_INTERFACE:-enP8p1s0}
 #    devices:
-#      - "/dev/ttyACM0:/dev/ttyACM0"
-#      - "/dev/ttyTHS1:/dev/ttyTHS1"         # GPS
-#      - "/dev/i2c-7:/dev/i2c-7"
-#      - "/dev/spidev12.0:/dev/spidev12.0" # SPI bus 12, CS0
-#      - "/dev/video0:/dev/video0"
+#      - "/dev/ttyACM0:/dev/ttyACM0" # canable
+#      - "/dev/ttyTHS1:/dev/ttyTHS1" # GPS
+#      - "/dev/i2c-7:/dev/i2c-7"     # IMU
+#      - "/dev/video0:/dev/video0"   # camera
     stdin_open: true
     tty: true
     restart: always
     command: bash /ws/scripts/start.bash
     network_mode: host
 volumes:
-  ros_cache:
+  ros_cache:
\ No newline at end of file
diff --git a/docker/Dockerfile b/docker/Dockerfile
index eca954a4..5b5141f7 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -20,6 +20,7 @@ RUN apt-get -o Acquire::http::Pipeline-Depth="0" \
       ros-${ROS_DISTRO}-rclpy ros-${ROS_DISTRO}-ros2bag \
       ros-${ROS_DISTRO}-tf2-tools ros-${ROS_DISTRO}-image-transport \
       ros-${ROS_DISTRO}-cv-bridge \
+      openssh-client \
       git curl nano \
       socat rtklib i2c-tools && \
     rm -rf /var/lib/apt/lists/*
diff --git a/docker/cyclonedds.xml b/docker/cyclonedds.xml
index 22c864df..37b993e5 100644
--- a/docker/cyclonedds.xml
+++ b/docker/cyclonedds.xml
@@ -3,6 +3,14 @@
             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
             xsi:schemaLocation="https://cdds.io/config https://raw.githubusercontent.com/eclipse-cyclonedds/cyclonedds/master/etc/cyclonedds.xsd">
   <Domain Id="any">
+    <General>
+      <AllowMulticast>true</AllowMulticast>
+
+      <Interfaces>
+        <NetworkInterface name="${ROS_NETWORK_INTERFACE}"/>
+      </Interfaces>
+    </General>
+
     <Discovery>
       <ParticipantIndex>auto</ParticipantIndex>
       <MaxAutoParticipantIndex>32</MaxAutoParticipantIndex>
diff --git a/docs/README/quickstart.md b/docs/README/quickstart.md
index d8890383..97625582 100644
--- a/docs/README/quickstart.md
+++ b/docs/README/quickstart.md
@@ -24,12 +24,19 @@ sudo docker compose -f compose/docker-compose.yml up -d dev
 ```
 3) Exec into container
 ```bash
-docker exec -it <container_name> bash
+docker exec -it ros2-dev bash
 ```
 4) Setup ROS
 ```bash
 source /opt/ros/humble/setup.bash
 colcon build
 source install/setup.bash
-ros2 launch autonomous_kart bringup_pi.launch.py
-```
\ No newline at end of file
+ros2 launch autonomous_kart bringup_${TARGET_DEVICE:-jetson}.launch.py
+```
+
+
+```bash
+ros2 bag record -s mcap -o /ws/bags/$(date +%Y%m%d_%H%M%S) /mpc/status /mpc/residual_mode /mpc/residual_revert /cmd_drive /odom /gps /imu /imu/calibration_status /e_comms/kart_speed_m_per_s /e_comms/throttle_pwm /e_comms/steering_pwm /e_comms/adcb_state /e_comms/rc_mode /system_state /track_angles /manual_commands /pathfinder_params
+```
+
+docker cp ros2-dev:/ws/bags .
diff --git a/requirements.txt b/requirements.txt
index 4aaaf627..3c286242 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,4 +9,6 @@ python-can
 flask
 flask-cors
 pyserial
-smbus2==0.6.1
\ No newline at end of file
+smbus2==0.6.1
+joblib
+scikit-learn
\ No newline at end of file
diff --git a/scripts/start.bash b/scripts/start.bash
index af0722e6..9ca04e74 100755
--- a/scripts/start.bash
+++ b/scripts/start.bash
@@ -7,24 +7,25 @@ source /opt/ros/humble/setup.bash
 colcon build
 pkill -f master_api || true
 
-# Lat/long are approx west lafayette, not important to be highly accurate
 mkdir -p /ws/logs
-str2str -in ntrip://shayman1:shayman1@108.59.49.226:9000/MSM4_VRS -out tcpsvr://:9195 -b 1 -p 40.4376975222 -86.9444409756 200 >> /ws/logs/str2str.log 2>&1 &
-
-# Logs supervisor - crash tolerant
-(
-  while true; do
-    RUN_NAME="run_$(date -u +%Y%m%d_%H%M%S)"
-    ros2 bag record -a \
-      --storage mcap \
-      --storage-preset-profile zstd_fast \
-      --max-bag-duration 60 \
-      --exclude '/camera/.*' \
-      --output "/ws/logs/$RUN_NAME" \
-      >> /ws/logs/recorder.log 2>&1
-    echo "[$(date -u +%FT%TZ)] recorder exited, restarting in 1s" >> /ws/logs/recorder.log
-    sleep 1
-  done
-) &
 
+if [ "$TARGET_DEVICE" == "jetson" ]; then
+    # Lat/long are approx west lafayette, not important to be highly accurate
+    str2str -in ntrip://shayman1:shayman1@108.59.49.226:9000/MSM4_VRS -out tcpsvr://:9195 -b 1 -p 40.4376975222 -86.9444409756 200 >> /ws/logs/str2str.log 2>&1 &
+elif [ "$TARGET_DEVICE" == "rubik" ]; then
+    (
+    while true; do
+        RUN_NAME="run_$(date -u +%Y%m%d_%H%M%S)"
+        ros2 bag record -a \
+        --storage mcap \
+        --storage-preset-profile zstd_fast \
+        --max-bag-duration 60 \
+        --exclude '/camera/.*' \
+        --output "/ws/logs/$RUN_NAME" \
+        >> /ws/logs/recorder.log 2>&1
+        echo "[$(date -u +%FT%TZ)] recorder exited, restarting in 1s" >> /ws/logs/recorder.log
+        sleep 1
+    done
+    ) &
+fi
 sleep infinity
\ No newline at end of file
diff --git a/sim/__init__.py b/sim/__init__.py
new file mode 100644
index 00000000..78df633a
--- /dev/null
+++ b/sim/__init__.py
@@ -0,0 +1,2 @@
+"""Offline data-driven kart simulator. See
+docs/superpowers/specs/2026-05-18-data-driven-sim-design.md."""
diff --git a/sim/clamp.py b/sim/clamp.py
new file mode 100644
index 00000000..eccbf385
--- /dev/null
+++ b/sim/clamp.py
@@ -0,0 +1,166 @@
+"""Off-distribution Mahalanobis clamp for the residual MLP."""
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+
+import numpy as np
+
+
+@dataclass
+class Clamp:
+    mean: np.ndarray
+    cov_inv: np.ndarray
+    d_warn: float
+    d_max: float
+
+    def _alpha_from_d(self, d: float) -> float:
+        if d <= self.d_warn:
+            return 1.0
+        if d >= self.d_max:
+            return 0.0
+        return 1.0 - (d - self.d_warn) / (self.d_max - self.d_warn)
+
+    def alpha(self, feat_normalised: np.ndarray) -> float:
+        delta = feat_normalised - self.mean
+        d = float(np.sqrt(delta @ self.cov_inv @ delta))
+        return self._alpha_from_d(d)
+
+    def save(self, path: str) -> None:
+        payload = {
+            "mean": self.mean.tolist(),
+            "cov_inv": self.cov_inv.tolist(),
+            "d_warn": self.d_warn,
+            "d_max": self.d_max,
+        }
+        with open(path, "w") as f:
+            json.dump(payload, f, indent=2)
+
+    @classmethod
+    def load(cls, path: str) -> "Clamp":
+        with open(path) as f:
+            d = json.load(f)
+        return cls(
+            mean=np.asarray(d["mean"], dtype=np.float64),
+            cov_inv=np.asarray(d["cov_inv"], dtype=np.float64),
+            d_warn=float(d["d_warn"]),
+            d_max=float(d["d_max"]),
+        )
+
+
+def fit_clamp(feats_normalised: np.ndarray, d_warn_pct=95, d_max_pct=99.5) -> Clamp:
+    mean = feats_normalised.mean(0)
+    cov = np.cov(feats_normalised.T)
+    cov += np.eye(cov.shape[0]) * 1e-6
+    cov_inv = np.linalg.inv(cov)
+    delta = feats_normalised - mean
+    d = np.sqrt(np.einsum("ni,ij,nj->n", delta, cov_inv, delta))
+    d_warn = float(np.percentile(d, d_warn_pct))
+    d_max = float(np.percentile(d, d_max_pct))
+    return Clamp(mean=mean, cov_inv=cov_inv, d_warn=d_warn, d_max=d_max)
+
+
+def fit_and_write(
+    train_npz_paths,
+    params_path: str,
+    mlp_path: str,
+    norm_path: str,
+    out_clamp_path: str,
+    out_noise_post_path: str,
+    out_residual_emp_post: str,
+) -> None:
+    import json as _json
+    import torch
+    from sim.identify_bicycle import BicycleParams, predict_derivatives
+    from sim.load_bags import AlignedBag
+    from sim.noise_model import NoiseModel, estimate_noise
+    from sim.residual_mlp import (
+        FEATURE_DIM,
+        Normaliser,
+        ResidualMLP,
+        build_features,
+    )
+
+    with open(params_path) as f:
+        params = BicycleParams(**_json.load(f)["params"])
+    with open(norm_path) as fn:
+        norm = Normaliser.from_json(_json.load(fn))
+    net = ResidualMLP()
+    net.load_state_dict(torch.load(mlp_path, map_location="cpu"))
+    net.eval()
+
+    feats_all, dv_res_post, psi_res_post = [], [], []
+    ct_all, cs_all, v_all = [], [], []
+    is_straight_all, is_steady_all = [], []
+    for path in train_npz_paths:
+        bag = AlignedBag.from_npz(path)
+        mask = bag.autonomous & (bag.v > 0.5)
+        if mask.sum() < 100:
+            continue
+        dt = float(np.median(np.diff(bag.t)))
+        dv_m, psi_m = predict_derivatives(params, bag.v, bag.cmd_throttle, bag.cmd_steer, dt)
+        feats = build_features(bag.v, bag.psi_dot, bag.cmd_throttle, bag.cmd_steer,
+                               bag.cmd_throttle_hist, bag.cmd_steer_hist)
+        feats_n = norm.apply(feats)
+        with torch.no_grad():
+            res = net(torch.from_numpy(feats_n.astype(np.float32))).numpy()
+        dv_res_post.append((bag.dv_dt - dv_m - res[:, 0])[mask])
+        psi_res_post.append((bag.psi_dot - psi_m - res[:, 1])[mask])
+        feats_all.append(feats_n[mask])
+        ct_all.append(bag.cmd_throttle[mask])
+        cs_all.append(bag.cmd_steer[mask])
+        v_all.append(bag.v[mask])
+        is_straight_all.append(np.abs(bag.cmd_steer[mask]) < 2.0)
+        d_throttle = np.abs(np.diff(bag.cmd_throttle[mask], prepend=bag.cmd_throttle[mask][0]))
+        is_steady_all.append(d_throttle < 2.0)
+
+    feats_concat = np.concatenate(feats_all).astype(np.float32)
+    clamp = fit_clamp(feats_concat)
+    clamp.save(out_clamp_path)
+    print(f"wrote {out_clamp_path}: d_warn={clamp.d_warn:.2f}, d_max={clamp.d_max:.2f}")
+
+    dv_res = np.concatenate(dv_res_post)
+    psi_res = np.concatenate(psi_res_post)
+    nm = estimate_noise(
+        dv_res, psi_res,
+        np.concatenate(ct_all), np.concatenate(cs_all),
+        np.concatenate(is_straight_all), np.concatenate(is_steady_all),
+    )
+    nm.save(out_noise_post_path)
+    print(f"wrote {out_noise_post_path}: sigma_v={nm.sigma_v:.3f}, sigma_psidot={nm.sigma_psidot:.3f}")
+
+    v_arr = np.concatenate(v_all)
+    cs = np.concatenate(cs_all)
+    v_edges = np.linspace(0.0, 15.0, 6)
+    cs_edges = np.linspace(0.0, 30.0, 4)
+    v_bin = np.clip(np.digitize(v_arr, v_edges) - 1, 0, v_edges.size - 2)
+    cs_bin = np.clip(np.digitize(np.abs(cs), cs_edges) - 1, 0, cs_edges.size - 2)
+    np.savez_compressed(out_residual_emp_post,
+                        dv_res=dv_res, psi_res=psi_res,
+                        v_bin=v_bin, cs_bin=cs_bin,
+                        v_edges=v_edges, cs_edges=cs_edges)
+    print(f"wrote {out_residual_emp_post}: {dv_res.size} samples")
+
+
+def main():
+    import argparse
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--train", nargs="+", required=True)
+    ap.add_argument("--params", required=True)
+    ap.add_argument("--mlp", required=True)
+    ap.add_argument("--norm", required=True)
+    ap.add_argument("--out-clamp", required=True)
+    ap.add_argument("--out-noise-post", required=True)
+    ap.add_argument("--out-residual-emp-post", required=True)
+    args = ap.parse_args()
+    fit_and_write(args.train, args.params, args.mlp, args.norm,
+                  args.out_clamp, args.out_noise_post, args.out_residual_emp_post)
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    REPO = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    if REPO not in sys.path:
+        sys.path.insert(0, REPO)
+    main()
diff --git a/sim/closed_loop.py b/sim/closed_loop.py
new file mode 100644
index 00000000..8e142fb3
--- /dev/null
+++ b/sim/closed_loop.py
@@ -0,0 +1,499 @@
+"""Standalone closed-loop simulator for the autonomous kart MPC planner.
+
+This module is a library. Use sim/runner.py for the CLI.
+"""
+from __future__ import annotations
+
+import math
+import os
+import sys
+import types
+from dataclasses import dataclass
+from typing import List, Optional, Tuple
+
+import numpy as np
+
+# ---------------------------------------------------------------------------
+# ROS stubs — MPCPlanner imports std_msgs.msg.Float32MultiArray at module level.
+# ---------------------------------------------------------------------------
+def _install_ros_stubs() -> None:
+    if "std_msgs" not in sys.modules:
+        std_msgs = types.ModuleType("std_msgs")
+        msg_mod = types.ModuleType("std_msgs.msg")
+
+        class Float32MultiArray:  # noqa: D401
+            def __init__(self, data=None):
+                self.data = list(data) if data is not None else []
+
+        msg_mod.Float32MultiArray = Float32MultiArray
+        std_msgs.msg = msg_mod
+        sys.modules["std_msgs"] = std_msgs
+        sys.modules["std_msgs.msg"] = msg_mod
+
+
+_install_ros_stubs()
+
+# Add the package source to sys.path so we can import the MPC planner directly.
+HERE = os.path.dirname(os.path.abspath(__file__))
+REPO = os.path.dirname(HERE)
+PKG_SRC = os.path.join(REPO, "src", "autonomous_kart")
+if PKG_SRC not in sys.path:
+    sys.path.insert(0, PKG_SRC)
+if REPO not in sys.path:
+    sys.path.insert(0, REPO)
+
+from autonomous_kart.nodes.localization.sim_bicycle import BicycleModel  # noqa: E402
+from autonomous_kart.nodes.pathfinder.planners.base import (  # noqa: E402
+    KartConstants,
+    PlannerInputs,
+)
+from autonomous_kart.nodes.pathfinder.planners.mpc import MPCPlanner  # noqa: E402
+
+
+# ---------------------------------------------------------------------------
+# Mock node — MPCPlanner needs node.get_parameter and node.create_publisher.
+# ---------------------------------------------------------------------------
+class _MockParam:
+    def __init__(self, value):
+        self.value = value
+
+
+class _MockPub:
+    def publish(self, _msg):
+        pass
+
+
+class MockNode:
+    def __init__(self, system_frequency: float):
+        self._params = {"system_frequency": system_frequency}
+
+    def get_parameter(self, name: str) -> _MockParam:
+        return _MockParam(self._params[name])
+
+    def create_publisher(self, _msg_type, _topic: str, _qos: int) -> _MockPub:
+        return _MockPub()
+
+
+# ---------------------------------------------------------------------------
+# SimResult
+# ---------------------------------------------------------------------------
+@dataclass
+class SimResult:
+    completed_laps: int
+    total_time_s: float
+    avg_lap_time_s: float
+    max_abs_d: float
+    avg_speed: float
+    safe: bool
+    aborted: bool
+    xs: np.ndarray       # (N,) m
+    ys: np.ndarray       # (N,) m
+    yaws: np.ndarray     # (N,) rad
+    vs: np.ndarray       # (N,) m/s
+    ds: np.ndarray       # (N,) signed lateral offset from racing line
+    times: np.ndarray    # (N,) seconds
+    # Diagnostics captured from the planner's residual learner at end of run.
+    # Empty dict if planner/learner missing or capture failed.
+    residual_stats: dict = None  # type: ignore[assignment]
+
+
+# ---------------------------------------------------------------------------
+# Default MPC params (from pathfinder.yaml).
+# ---------------------------------------------------------------------------
+DEFAULT_MPC = {
+    "horizon_steps": 20,
+    "dt_s": 0.05,
+    "num_samples": 48,
+    "steer_sigma_deg": 6.0,
+    "accel_sigma_mps2": 1.0,
+    "proj_window_back": 5,
+    "proj_window_fwd": 60,
+    "frenet_v_window_s": 0.3,
+    # NOTE: the planner reads `target_speed_mps` (a literal m/s cap), NOT a
+    # percentage. The kart's actual speed is min(line's per-tick vx_mps,
+    # target_speed_mps). For line6.csv vx_mps=6 throughout, the line drives
+    # the speed unless target_speed_mps is set lower.
+    "target_speed_mps": 12.0,
+    "target_speed_pct": 1.0,  # unused by planner; kept for backward compat
+    "track_half_width_m": 2.0,
+    "safety_margin_m": 0.5,
+    "kart_half_width_m": 0.5025,
+    "w_d": 100.0,
+    "w_heading": 30.0,
+    "w_speed": 5.0,
+    "w_delta": 5.0,
+    "w_delta_rate": 50.0,
+    "w_accel": 2.0,
+    "w_boundary": 1000.0,
+    "w_progress": 1.0,
+    "w_terminal_d": 200.0,
+    "w_terminal_heading": 100.0,
+    "w_a_lat": 50.0,
+    "w_edge": 200.0,
+    "edge_inner_frac": 0.5,
+    "rejoin_cte_activate": 2.5,
+    "rejoin_cte_deactivate": 1.0,
+    "rejoin_merge_lookahead_m": 15.0,
+    "rejoin_min_turning_radius": 3.0,
+    "max_consecutive_solver_failures": 3,
+    "infeasible_cost": 1.0e9,
+    "feasibility_threshold": 5.0e8,
+    # residual learner runs in shadow mode by default.
+    # cache disabled so the script runs without a writable /root/.cache.
+    "residual.mode": "shadow",
+    "residual.cache_enabled": False,
+    "residual.target_horizon_s": 0.5,
+    "residual.forgetting_factor": 0.99,
+    "residual.initial_cov": 1000.0,
+    "residual.min_train_speed_mps": 0.5,
+    "residual.error_window": 200,
+}
+
+
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+def _line_xy(line: List[Tuple[float, ...]]) -> Tuple[np.ndarray, np.ndarray]:
+    arr = np.asarray([[r[1], r[2]] for r in line], dtype=np.float64)
+    return arr[:, 0], arr[:, 1]
+
+
+def _line_tangent_normal(
+    lx: np.ndarray, ly: np.ndarray,
+) -> Tuple[np.ndarray, np.ndarray]:
+    dx = np.empty_like(lx)
+    dy = np.empty_like(ly)
+    dx[1:-1] = lx[2:] - lx[:-2]
+    dy[1:-1] = ly[2:] - ly[:-2]
+    dx[0] = lx[1] - lx[0]
+    dy[0] = ly[1] - ly[0]
+    dx[-1] = lx[-1] - lx[-2]
+    dy[-1] = ly[-1] - ly[-2]
+    L = np.hypot(dx, dy)
+    L = np.where(L < 1e-9, 1.0, L)
+    tx = dx / L
+    ty = dy / L
+    # Left-hand normal (positive d side, matches MPC Frenet convention).
+    nx = -ty
+    ny = tx
+    return nx, ny
+
+
+def _line_length(line: List[Tuple[float, ...]]) -> float:
+    s_last = float(line[-1][0])
+    x0, y0 = float(line[0][1]), float(line[0][2])
+    xn, yn = float(line[-1][1]), float(line[-1][2])
+    seg = math.hypot(xn - x0, yn - y0)
+    return s_last + seg
+
+
+def _signed_d_at(
+    x: float, y: float, j: int,
+    lx: np.ndarray, ly: np.ndarray,
+    nx: np.ndarray, ny: np.ndarray,
+) -> float:
+    return float((x - lx[j]) * nx[j] + (y - ly[j]) * ny[j])
+
+
+def _kart_from_yaml() -> KartConstants:
+    """KartConstants for the MPC planner — sourced from KartPhysics."""
+    from sim.kart_params import KartPhysics
+    return KartPhysics().to_kart_constants()
+
+
+# ---------------------------------------------------------------------------
+# Main simulate() function
+# ---------------------------------------------------------------------------
+def simulate(
+    line: List[Tuple[float, ...]],
+    mpc_params: dict,
+    *,
+    sim_backend: str = "datasim",
+    datasim_model_dir: str = "sim/model",
+    datasim_use_mlp: bool = True,
+    noise_mode: str = "none",
+    rng_seed: int = 0,
+    n_laps: int = 2,
+    dt: float = 1.0 / 60.0,
+    max_steps: int = 30000,
+    track_half_width: float = 2.0,
+    kart_half_width: float = 0.5025,
+    safety_margin: float = 0.2,
+) -> SimResult:
+    """Run a closed-loop MPC simulation over `n_laps` of the racing line.
+
+    Parameters
+    ----------
+    line:
+        Racing line as a list of tuples ``(s, x, y, ...)``.
+    mpc_params:
+        MPC parameter dict (see DEFAULT_MPC for available keys).
+    sim_backend:
+        ``"bicycle"`` for BicycleModel, ``"datasim"`` for DataSim.
+    datasim_model_dir:
+        Directory containing the DataSim model files
+        (``bicycle_params.json``, ``noise.json``, ``mlp.pt``, etc.).
+    noise_mode:
+        Passed through to DataSim (``"none"``, ``"gaussian"``, ``"bootstrap"``).
+    rng_seed:
+        RNG seed for both MPC and DataSim noise.
+    n_laps:
+        Number of laps to complete before stopping.
+    dt:
+        Simulation timestep in seconds.
+    max_steps:
+        Hard cap on simulation steps.
+    track_half_width:
+        Half-width of the track in metres.
+    kart_half_width:
+        Half-width of the kart in metres.
+    safety_margin:
+        Additional margin from track edge (safety check).
+    """
+    kart = _kart_from_yaml()
+
+    # Build sim backend
+    if sim_backend == "bicycle":
+        model = BicycleModel(kart.wheelbase_m, kart.v_max_mps, kart.steer_max_deg)
+    elif sim_backend == "datasim":
+        from sim.data_sim import DataSim
+        # Resolve relative model dir from repo root
+        mdir = datasim_model_dir if os.path.isabs(datasim_model_dir) else os.path.join(REPO, datasim_model_dir)
+        # Bicycle physics come from KartPhysics (yaml-aligned), not from a
+        # bag-fit JSON. The MLP residual / clamp files (if present) layer on
+        # top to model anything the structured bicycle can't explain. Disable
+        # with datasim_use_mlp=False — recommended in closed loop since the
+        # currently-trained MLP has a positive dv bias that compounds.
+        mlp_path = os.path.join(mdir, "mlp.pt") if datasim_use_mlp else None
+        norm_path = os.path.join(mdir, "norm.json") if datasim_use_mlp else None
+        clamp_path = os.path.join(mdir, "clamp.json") if datasim_use_mlp else None
+        # Optional: a user-supplied kart_params.json overlay in the model dir
+        # bypasses the code-level defaults.  Format: same as KartPhysics fields.
+        kart_params_json = os.path.join(mdir, "kart_params.json")
+        model = DataSim(
+            params_path=os.path.join(mdir, "bicycle_params.json"),
+            noise_path=os.path.join(mdir, "noise.json"),
+            mlp_path=mlp_path,
+            norm_path=norm_path,
+            clamp_path=clamp_path,
+            residual_emp_path=os.path.join(mdir, "residual_emp.npz"),
+            noise_mode=noise_mode,
+            rng_seed=rng_seed,
+        )
+    else:
+        raise ValueError(f"unknown sim_backend {sim_backend!r}")
+
+    node = MockNode(system_frequency=1.0 / dt)
+    planner = MPCPlanner(mpc_params, kart, line, logger=None, node=node)
+    planner._rng = np.random.default_rng(rng_seed)
+
+    x0, y0 = float(line[0][1]), float(line[0][2])
+    yaw0 = math.atan2(float(line[1][2]) - y0, float(line[1][1]) - x0)
+    model.reset(x0, y0, yaw0)
+
+    lx, ly = _line_xy(line)
+    nx_arr, ny_arr = _line_tangent_normal(lx, ly)
+    line_L = _line_length(line)
+    s_arr = np.asarray([r[0] for r in line], dtype=np.float64)
+    n_line = len(line)
+
+    xs: List[float] = []
+    ys: List[float] = []
+    yaws: List[float] = []
+    vs: List[float] = []
+    ds_list: List[float] = []
+    times: List[float] = []
+    speeds: List[float] = []
+
+    laps = 0
+    safety_limit = track_half_width - kart_half_width - safety_margin
+
+    # Forward-only progress tracker (wrap-aware).
+    progress_idx = 0
+    cumulative_s = 0.0
+    s_prev = float(s_arr[0])
+    win_back = 3
+    win_fwd = 25
+
+    t0_ns = 0
+    completion_step = max_steps
+
+    for step in range(max_steps):
+        now_ns = t0_ns + int(step * dt * 1e9)
+        inputs = PlannerInputs(
+            pose_xy=(model.x, model.y),
+            yaw_rad=model.yaw,
+            speed_mps=model.speed,
+            track_angles=None,
+            now_ns=now_ns,
+        )
+        try:
+            cmd = planner.plan(inputs)
+        except Exception:
+            return SimResult(
+                completed_laps=laps,
+                total_time_s=step * dt,
+                avg_lap_time_s=float("inf"),
+                max_abs_d=float(max(abs(d) for d in ds_list)) if ds_list else 99.0,
+                avg_speed=float(np.mean(speeds) if speeds else 0.0),
+                safe=False,
+                aborted=True,
+                xs=np.asarray(xs),
+                ys=np.asarray(ys),
+                yaws=np.asarray(yaws),
+                vs=np.asarray(vs),
+                ds=np.asarray(ds_list),
+                times=np.asarray(times),
+                residual_stats=_capture_residual_stats(planner),
+            )
+
+        motor_pct, steer_deg = cmd
+        model.step(motor_pct, steer_deg, dt)
+
+        # Forward-windowed nearest-point search
+        idxs = (progress_idx + np.arange(-win_back, win_fwd + 1)) % n_line
+        wx = lx[idxs] - model.x
+        wy = ly[idxs] - model.y
+        wj = int(np.argmin(wx * wx + wy * wy))
+        j = int(idxs[wj])
+        progress_idx = j
+
+        d_signed = _signed_d_at(model.x, model.y, j, lx, ly, nx_arr, ny_arr)
+        t_now = step * dt
+
+        xs.append(model.x)
+        ys.append(model.y)
+        yaws.append(model.yaw)
+        vs.append(model.speed)
+        ds_list.append(d_signed)
+        times.append(t_now)
+        speeds.append(model.speed)
+
+        # Bail if completely off the rails.
+        if abs(d_signed) > 10.0:
+            return SimResult(
+                completed_laps=laps,
+                total_time_s=step * dt,
+                avg_lap_time_s=float("inf"),
+                max_abs_d=float(max(abs(d) for d in ds_list)),
+                avg_speed=float(np.mean(speeds)),
+                safe=False,
+                aborted=True,
+                xs=np.asarray(xs),
+                ys=np.asarray(ys),
+                yaws=np.asarray(yaws),
+                vs=np.asarray(vs),
+                ds=np.asarray(ds_list),
+                times=np.asarray(times),
+                residual_stats=_capture_residual_stats(planner),
+            )
+
+        # Cumulative arc-length with wrap-aware delta.
+        s_now = float(s_arr[j])
+        ds_step = s_now - s_prev
+        if ds_step > line_L * 0.5:
+            ds_step -= line_L
+        elif ds_step < -line_L * 0.5:
+            ds_step += line_L
+        cumulative_s += ds_step
+        s_prev = s_now
+        new_laps = int(cumulative_s // line_L)
+        if new_laps > laps:
+            laps = new_laps
+            if laps >= n_laps:
+                completion_step = step + 1
+                break
+
+    total_time = completion_step * dt
+    avg_lap_time = total_time / max(1, laps)
+    xs_a = np.asarray(xs)
+    ys_a = np.asarray(ys)
+    yaws_a = np.asarray(yaws)
+    vs_a = np.asarray(vs)
+    ds_a = np.asarray(ds_list)
+    times_a = np.asarray(times)
+    max_abs_d = float(np.max(np.abs(ds_a))) if ds_a.size else 99.0
+    avg_speed = float(np.mean(speeds)) if speeds else 0.0
+    safe = (laps >= n_laps) and (max_abs_d <= safety_limit)
+    return SimResult(
+        completed_laps=laps,
+        total_time_s=total_time,
+        avg_lap_time_s=avg_lap_time if laps >= n_laps else float("inf"),
+        max_abs_d=max_abs_d,
+        avg_speed=avg_speed,
+        safe=safe,
+        aborted=False,
+        xs=xs_a,
+        ys=ys_a,
+        yaws=yaws_a,
+        vs=vs_a,
+        ds=ds_a,
+        times=times_a,
+        residual_stats=_capture_residual_stats(planner),
+    )
+
+
+def _capture_residual_stats(planner) -> dict:
+    """Snapshot the planner's residual learner at end of run.
+
+    Returns a dict suitable for JSON serialisation. Best-effort: any attribute
+    that's missing on the user's local fork is silently skipped. Empty dict if
+    the planner has no residual learner at all.
+    """
+    out: dict = {}
+    r = getattr(planner, "residual", None)
+    if r is None:
+        return out
+    for attr in (
+        "mode", "use_gbm", "samples_trained", "samples_accepted_this_run",
+        "outliers_dropped", "off_line_skipped", "divergence_resets",
+        "revert_count", "rls_warmup_samples", "apply_min_samples_this_run",
+        "gbm_enabled", "last_active_model", "last_pred_clipped",
+    ):
+        if hasattr(r, attr):
+            v = getattr(r, attr)
+            try:
+                out[attr] = bool(v) if isinstance(v, (bool, np.bool_)) else (
+                    int(v) if isinstance(v, (int, np.integer)) else
+                    float(v) if isinstance(v, (float, np.floating)) else
+                    str(v)
+                )
+            except Exception:
+                pass
+    # Effective mode (gated)
+    if hasattr(r, "effective_mode"):
+        try:
+            out["effective_mode"] = str(r.effective_mode())
+        except Exception:
+            pass
+    # Theta norms
+    try:
+        import numpy as _np
+        if hasattr(r, "theta_s"):
+            out["theta_s_norm"] = float(_np.linalg.norm(r.theta_s))
+        if hasattr(r, "theta_d"):
+            out["theta_d_norm"] = float(_np.linalg.norm(r.theta_d))
+    except Exception:
+        pass
+    # Mean error window (nominal vs residual)
+    if hasattr(r, "mean_error"):
+        try:
+            nom_s, nom_d, res_s, res_d = r.mean_error()
+            out["mean_err_nom_s"] = float(nom_s)
+            out["mean_err_nom_d"] = float(nom_d)
+            out["mean_err_res_s"] = float(res_s)
+            out["mean_err_res_d"] = float(res_d)
+        except Exception:
+            pass
+    # Last-prediction snapshot (helpful to see what magnitude residual is firing)
+    for attr in ("last_pred_s", "last_pred_d"):
+        if hasattr(r, attr):
+            try:
+                out[attr] = float(getattr(r, attr))
+            except Exception:
+                pass
+    return out
+
+
+if __name__ == "__main__":
+    print("sim/closed_loop.py is a library. Use sim/runner.py.")
diff --git a/sim/data_sim.py b/sim/data_sim.py
new file mode 100644
index 00000000..da02f42a
--- /dev/null
+++ b/sim/data_sim.py
@@ -0,0 +1,205 @@
+"""Runtime: identified-bicycle simulator with the same step() interface as
+sim_bicycle.BicycleModel. MLP residual is added in Task 12."""
+from __future__ import annotations
+
+import json
+import math
+import os
+from collections import deque
+from typing import Deque, Tuple
+
+import numpy as np
+
+from sim.identify_bicycle import BicycleParams
+from sim.load_bags import NUM_STEER_HIST, NUM_THROTTLE_HIST
+from sim.noise_model import NoiseModel
+
+
+class DataSim:
+    def __init__(
+        self,
+        params_path: str | None = None,
+        noise_path: str | None = None,
+        mlp_path: str | None = None,
+        norm_path: str | None = None,
+        clamp_path: str | None = None,
+        residual_emp_path: str | None = None,
+        noise_mode: str = "none",
+        rng_seed: int = 0,
+        bicycle_params: BicycleParams | None = None,
+    ):
+        # Bicycle physics — preferred order:
+        #   1. explicit bicycle_params= (a KartPhysics-derived dataclass)
+        #   2. params_path JSON (legacy bag-fit; kept for back-compat)
+        #   3. yaml-aligned KartPhysics defaults
+        if bicycle_params is not None:
+            self.params = bicycle_params
+        elif params_path is not None and os.path.isfile(params_path):
+            with open(params_path) as f:
+                self.params = BicycleParams(**json.load(f)["params"])
+        else:
+            from sim.kart_params import KartPhysics
+            self.params = KartPhysics().to_bicycle_params()
+        # Noise model: optional (synth ticks if no file).
+        if noise_path is not None and os.path.isfile(noise_path):
+            self.noise = NoiseModel.load(noise_path)
+        else:
+            self.noise = NoiseModel(sigma_v=0.5, sigma_psidot=0.05,
+                                    sigma_steer_cmd=0.2, sigma_throttle_cmd=0.5)
+        if noise_mode not in ("none", "gaussian", "bootstrap"):
+            raise ValueError(f"unknown noise_mode {noise_mode!r}")
+        self.noise_mode = noise_mode
+        self.rng = np.random.default_rng(rng_seed)
+
+        self._mlp = None
+        self._norm = None
+        self._clamp = None
+        self._emp = None
+        if mlp_path is not None and norm_path is not None and clamp_path is not None:
+            self._load_mlp(mlp_path, norm_path, clamp_path)
+        if residual_emp_path is not None:
+            self._emp = np.load(residual_emp_path)
+
+        self.x = self.y = self.yaw = self.v = 0.0
+        self._delta_actual_rad = 0.0
+        self._cmd_throttle_hist: Deque[float] = deque(maxlen=NUM_THROTTLE_HIST)
+        self._cmd_steer_hist: Deque[float] = deque(maxlen=NUM_STEER_HIST)
+        for _ in range(NUM_THROTTLE_HIST):
+            self._cmd_throttle_hist.append(0.0)
+        for _ in range(NUM_STEER_HIST):
+            self._cmd_steer_hist.append(0.0)
+
+    @property
+    def speed(self) -> float:
+        """Alias for v — matches BicycleModel.speed attribute used by run_sim."""
+        return self.v
+
+    def reset(self, x: float, y: float, yaw: float) -> None:
+        self.x, self.y, self.yaw = float(x), float(y), float(yaw)
+        self.v = 0.0
+        self._delta_actual_rad = 0.0
+
+    def _load_mlp(self, mlp_path, norm_path, clamp_path):
+        import torch
+        import json
+        from sim.clamp import Clamp
+        from sim.residual_mlp import Normaliser, ResidualMLP
+        self._mlp = ResidualMLP()
+        self._mlp.load_state_dict(torch.load(mlp_path, map_location="cpu"))
+        self._mlp.train(False)  # equivalent to net.eval() — sets inference mode
+        with open(norm_path) as f:
+            self._norm = Normaliser.from_json(json.load(f))
+        self._clamp = Clamp.load(clamp_path)
+        self._torch = torch
+
+    def _draw_noise(self) -> Tuple[float, float]:
+        if self.noise_mode == "none":
+            return 0.0, 0.0
+        if self.noise_mode == "gaussian" or self._emp is None:
+            return (
+                self.rng.normal(0.0, self.noise.sigma_v),
+                self.rng.normal(0.0, self.noise.sigma_psidot),
+            )
+        v_edges = self._emp["v_edges"]
+        cs_edges = self._emp["cs_edges"]
+        last_steer_abs = abs(self._cmd_steer_hist[-1])
+        vb = int(np.clip(np.digitize(self.v, v_edges) - 1, 0, v_edges.size - 2))
+        cb = int(np.clip(np.digitize(last_steer_abs, cs_edges) - 1, 0, cs_edges.size - 2))
+        mask = (self._emp["v_bin"] == vb) & (self._emp["cs_bin"] == cb)
+        if mask.sum() < 32:
+            return (
+                self.rng.normal(0.0, self.noise.sigma_v),
+                self.rng.normal(0.0, self.noise.sigma_psidot),
+            )
+        idx = self.rng.integers(0, mask.sum())
+        return (
+            float(self._emp["dv_res"][mask][idx]),
+            float(self._emp["psi_res"][mask][idx]),
+        )
+
+    def step(self, target_mps: float, steer_deg: float, dt: float) -> Tuple[float, float, float, float]:
+        p = self.params
+        target_v = max(0.0, min(p.v_max_mps, float(target_mps)))
+
+        tau = p.accel_tau if target_v >= self.v else p.brake_tau
+        alpha = dt / (tau + dt)
+        dv_struct = alpha * (target_v - self.v) / max(dt, 1e-6)
+
+        delta_cmd_rad = math.radians(p.steer_gain * float(steer_deg))
+        slop = math.radians(max(0.0, p.steer_slop_deg))
+        if delta_cmd_rad > self._delta_actual_rad + slop:
+            target_delta = delta_cmd_rad - slop
+        elif delta_cmd_rad < self._delta_actual_rad - slop:
+            target_delta = delta_cmd_rad + slop
+        else:
+            target_delta = self._delta_actual_rad
+        rate_lim = math.radians(p.steer_rate_max_degps) * dt
+        step_delta = max(-rate_lim, min(rate_lim, target_delta - self._delta_actual_rad))
+        self._delta_actual_rad += step_delta
+        delta = self._delta_actual_rad
+
+        if p.wheelbase_m > 1e-6:
+            psi_dot_struct = self.v * math.tan(delta) / p.wheelbase_m
+        else:
+            psi_dot_struct = 0.0
+        psi_dot_struct -= p.slip_angle_at_v * self.v * (1.0 if delta >= 0 else -1.0) * delta
+
+        if self._mlp is not None:
+            from sim.residual_mlp import build_features
+            feat = build_features(
+                np.array([self.v]),
+                np.array([psi_dot_struct]),
+                np.array([float(target_mps)]),
+                np.array([float(steer_deg)]),
+                np.asarray(self._cmd_throttle_hist)[None, :],
+                np.asarray(self._cmd_steer_hist)[None, :],
+            )
+            feat_n = self._norm.apply(feat)
+            alpha = self._clamp.alpha(feat_n[0])
+            with self._torch.no_grad():
+                pred = self._mlp(self._torch.from_numpy(feat_n.astype(np.float32))).numpy()[0]
+            d_dv = float(alpha * pred[0])
+            d_psi = float(alpha * pred[1])
+        else:
+            d_dv = 0.0
+            d_psi = 0.0
+
+        # Noise — see _draw_noise.
+        n_dv, n_psi = self._draw_noise()
+
+        # --- tire grip saturation ---
+        # The kinematic bicycle has infinite grip — it'll happily turn at any
+        # a_lat. Real karts can't. Above tire_a_lat_max, the tire saturates:
+        # actual a_lat is capped (kart understeers) and forward speed bleeds
+        # toward the grip-limited cap v_grip = sqrt(grip·L / |tan(δ)|).
+        # Calibrated from bag 231452: at κ=0.14 kart steady-states at v≈4.38,
+        # giving a_lat ≈ 2.7 m/s².  Set tire_a_lat_max_mps2=inf to disable.
+        tan_d_abs = abs(math.tan(delta))
+        grip_slip_decel = 0.0
+        if (p.tire_a_lat_max_mps2 != float("inf")
+                and tan_d_abs > 1e-4
+                and p.wheelbase_m > 1e-6):
+            a_lat_demand = self.v * self.v * tan_d_abs / p.wheelbase_m
+            if a_lat_demand > p.tire_a_lat_max_mps2:
+                v_grip = math.sqrt(p.tire_a_lat_max_mps2 * p.wheelbase_m / tan_d_abs)
+                if self.v > v_grip:
+                    # Bleed excess speed toward v_grip with tau_slip.
+                    grip_slip_decel = (self.v - v_grip) / max(p.tire_slip_tau_s, 1e-6)
+                # Cap actual yaw rate so a_lat doesn't exceed grip: ω·v ≤ grip
+                psi_dot_grip = p.tire_a_lat_max_mps2 / max(self.v, 0.1)
+                psi_dot_struct = math.copysign(
+                    min(abs(psi_dot_struct), psi_dot_grip), psi_dot_struct
+                )
+
+        self.v += (dv_struct + d_dv + n_dv - grip_slip_decel) * dt
+        self.v = max(0.0, min(p.v_max_mps, self.v))
+        psi_dot = psi_dot_struct + d_psi + n_psi
+        self.yaw += psi_dot * dt
+        self.yaw = math.atan2(math.sin(self.yaw), math.cos(self.yaw))
+        self.x += self.v * math.cos(self.yaw) * dt
+        self.y += self.v * math.sin(self.yaw) * dt
+
+        self._cmd_throttle_hist.append(float(target_mps))
+        self._cmd_steer_hist.append(float(steer_deg))
+
+        return self.x, self.y, self.yaw, self.v
diff --git a/sim/identify_bicycle.py b/sim/identify_bicycle.py
new file mode 100644
index 00000000..584f73ab
--- /dev/null
+++ b/sim/identify_bicycle.py
@@ -0,0 +1,236 @@
+"""Identify kinematic-bicycle + actuator parameters from aligned bag data.
+
+See docs/superpowers/specs/2026-05-18-data-driven-sim-design.md section 5.
+"""
+from __future__ import annotations
+
+from dataclasses import asdict, dataclass
+from typing import Tuple
+
+import numpy as np
+
+
+@dataclass
+class BicycleParams:
+    accel_tau: float = 0.5
+    brake_tau: float = 0.3
+    throttle_delay_s: float = 0.0
+    steer_delay_s: float = 0.0
+    steer_gain: float = 1.0      # cmd_steer_deg * steer_gain -> wheel angle deg
+    steer_slop_deg: float = 0.0
+    wheelbase_m: float = 1.05
+    steer_rate_max_degps: float = 180.0
+    slip_angle_at_v: float = 0.0  # rad/(m/s); negative reduces yaw rate at high v
+    v_max_mps: float = 12.0
+    # Lateral grip: max a_lat the tires can sustain.  Above this the kart
+    # slips (understeers) and speed bleeds toward sqrt(grip · L / |tan(δ)|).
+    # Default inf preserves the old kinematic-bicycle (infinite-grip) behavior.
+    tire_a_lat_max_mps2: float = float("inf")
+    tire_slip_tau_s: float = 0.2
+
+
+def _delay_shift(cmd: np.ndarray, delay_s: float, dt: float) -> np.ndarray:
+    """Shift cmd backward in time by delay_s (positive delay = use older cmd)."""
+    k = max(0, int(round(delay_s / dt)))
+    if k == 0:
+        return cmd
+    out = np.empty_like(cmd)
+    out[:k] = cmd[0]
+    out[k:] = cmd[:-k]
+    return out
+
+
+def predict_derivatives(
+    p: BicycleParams,
+    v: np.ndarray,
+    cmd_throttle_pct: np.ndarray,
+    cmd_steer_deg: np.ndarray,
+    dt: float,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """Vectorised one-step bicycle prediction. Returns (dv_dt_model, psi_dot_model)."""
+    throttle = _delay_shift(cmd_throttle_pct, p.throttle_delay_s, dt)
+    steer = _delay_shift(cmd_steer_deg, p.steer_delay_s, dt)
+
+    target_v = np.clip(throttle / 100.0, 0.0, 1.0) * p.v_max_mps
+    tau = np.where(target_v >= v, p.accel_tau, p.brake_tau)
+    dv_dt_model = (target_v - v) / np.maximum(tau, 1e-3)
+
+    slop = np.deg2rad(max(0.0, p.steer_slop_deg))
+    delta_cmd_rad = np.deg2rad(p.steer_gain * steer)
+    delta = np.empty_like(delta_cmd_rad)
+    actual = 0.0
+    rate_lim = np.deg2rad(p.steer_rate_max_degps) * dt
+    for i in range(delta_cmd_rad.size):
+        c = delta_cmd_rad[i]
+        if c > actual + slop:
+            target = c - slop
+        elif c < actual - slop:
+            target = c + slop
+        else:
+            target = actual
+        delta_step = np.clip(target - actual, -rate_lim, rate_lim)
+        actual += delta_step
+        delta[i] = actual
+
+    psi_dot_kin = v * np.tan(delta) / max(p.wheelbase_m, 1e-3)
+    psi_dot_model = psi_dot_kin - p.slip_angle_at_v * v * np.sign(delta) * delta
+    return dv_dt_model, psi_dot_model
+
+
+def fit_params(
+    v: np.ndarray,
+    cmd_throttle: np.ndarray,
+    cmd_steer: np.ndarray,
+    dv_dt_obs: np.ndarray,
+    psi_dot_obs: np.ndarray,
+    dt: float,
+    prior: BicycleParams,
+    speed_floor_mps: float = 0.5,
+) -> Tuple[BicycleParams, dict]:
+    """Fit bicycle params by minimising weighted squared residuals on
+    (dv_dt, psi_dot). Returns (fitted_params, info_dict)."""
+    from scipy.optimize import least_squares
+
+    mask = v > speed_floor_mps
+
+    order = ("accel_tau", "brake_tau", "throttle_delay_s", "steer_delay_s",
+             "steer_gain", "steer_slop_deg", "wheelbase_m",
+             "steer_rate_max_degps", "slip_angle_at_v")
+    x0 = np.array([getattr(prior, k) for k in order], dtype=np.float64)
+    lo = np.array([0.05, 0.05, 0.0, 0.0, 0.1, 0.0, 0.95 * prior.wheelbase_m, 30.0, -0.5])
+    hi = np.array([5.0,  5.0,  0.5, 0.5, 5.0, 8.0, 1.05 * prior.wheelbase_m, 500.0, 0.5])
+
+    sigma_v_rough = max(np.std(dv_dt_obs[mask]), 0.5)
+    sigma_p_rough = max(np.std(psi_dot_obs[mask]), 0.1)
+
+    def residuals(x):
+        p = BicycleParams(**{k: float(val) for k, val in zip(order, x)},
+                          v_max_mps=prior.v_max_mps)
+        dv_m, psi_m = predict_derivatives(p, v, cmd_throttle, cmd_steer, dt)
+        r_dv = (dv_dt_obs - dv_m)[mask] / sigma_v_rough
+        r_psi = (psi_dot_obs - psi_m)[mask] / sigma_p_rough
+        return np.concatenate([r_dv, r_psi])
+
+    sol = least_squares(residuals, x0, bounds=(lo, hi), method="trf", max_nfev=500)
+    fitted = BicycleParams(**{k: float(val) for k, val in zip(order, sol.x)},
+                           v_max_mps=prior.v_max_mps)
+    info = {
+        "cost": float(sol.cost), "nfev": int(sol.nfev), "success": bool(sol.success),
+        "sigma_v_rough": float(sigma_v_rough), "sigma_p_rough": float(sigma_p_rough),
+        "param_order": list(order), "x_final": sol.x.tolist(),
+    }
+    return fitted, info
+
+
+def aggregate_bags(npz_paths, autonomous_only: bool = True, speed_floor: float = 0.5):
+    """Concatenate aligned bags into a single training array. Returns
+    (v, cmd_throttle, cmd_steer, dv_dt, psi_dot, dt)."""
+    from sim.load_bags import AlignedBag
+    vs, cts, css, dvs, pss = [], [], [], [], []
+    dt_est = None
+    for path in npz_paths:
+        bag = AlignedBag.from_npz(path)
+        dt_bag = float(np.median(np.diff(bag.t)))
+        if dt_est is None:
+            dt_est = dt_bag
+        elif abs(dt_bag - dt_est) > 1e-6:
+            raise ValueError(f"dt mismatch between bags: {dt_est} vs {dt_bag}")
+        mask = np.ones(bag.t.size, dtype=bool)
+        if autonomous_only:
+            mask &= bag.autonomous
+        mask &= bag.v > speed_floor
+        vs.append(bag.v[mask])
+        cts.append(bag.cmd_throttle[mask])
+        css.append(bag.cmd_steer[mask])
+        dvs.append(bag.dv_dt[mask])
+        pss.append(bag.psi_dot[mask])
+    return (
+        np.concatenate(vs), np.concatenate(cts), np.concatenate(css),
+        np.concatenate(dvs), np.concatenate(pss), dt_est,
+    )
+
+
+def plot_diagnostics(npz_paths, params_path: str, out_dir: str):
+    import json
+    import os
+    import matplotlib
+    matplotlib.use("Agg")
+    import matplotlib.pyplot as plt
+    from sim.load_bags import AlignedBag
+
+    with open(params_path) as f:
+        payload = json.load(f)
+    p = BicycleParams(**payload["params"])
+    os.makedirs(out_dir, exist_ok=True)
+
+    for npz_path in npz_paths:
+        bag = AlignedBag.from_npz(npz_path)
+        mask = bag.autonomous & (bag.v > 0.5)
+        if mask.sum() < 100:
+            continue
+        dt = float(np.median(np.diff(bag.t)))
+        dv_m, psi_m = predict_derivatives(p, bag.v, bag.cmd_throttle, bag.cmd_steer, dt)
+
+        fig, axes = plt.subplots(2, 1, figsize=(12, 6), sharex=True)
+        t_plot = bag.t[mask]
+        axes[0].plot(t_plot, bag.dv_dt[mask], color="#333", lw=0.7, label="obs dv/dt")
+        axes[0].plot(t_plot, dv_m[mask], color="#c33", lw=0.7, label="model dv/dt")
+        axes[0].set_ylabel("dv/dt (m/s^2)")
+        axes[0].legend(loc="upper right", frameon=False)
+        axes[1].plot(t_plot, bag.psi_dot[mask], color="#333", lw=0.7, label="obs psi_dot")
+        axes[1].plot(t_plot, psi_m[mask], color="#37c", lw=0.7, label="model psi_dot")
+        axes[1].set_ylabel("psi_dot (rad/s)")
+        axes[1].set_xlabel("t (s)")
+        axes[1].legend(loc="upper right", frameon=False)
+        tag = os.path.splitext(os.path.basename(npz_path))[0]
+        fig.suptitle(f"ID fit overlay — {tag}")
+        fig.tight_layout()
+        out_png = os.path.join(out_dir, f"id_overlay_{tag}.png")
+        fig.savefig(out_png, dpi=120)
+        plt.close(fig)
+        print(f"wrote {out_png}")
+
+
+def main():
+    import argparse
+    import json
+    import os
+    ap = argparse.ArgumentParser()
+    sub = ap.add_subparsers(dest="cmd", required=True)
+
+    p_fit = sub.add_parser("fit")
+    p_fit.add_argument("--train", nargs="+", required=True)
+    p_fit.add_argument("--out", required=True)
+    p_fit.add_argument("--v-max", type=float, default=12.0)
+    p_fit.add_argument("--wheelbase", type=float, default=1.05)
+
+    p_plot = sub.add_parser("plot")
+    p_plot.add_argument("--bags", nargs="+", required=True)
+    p_plot.add_argument("--params", required=True)
+    p_plot.add_argument("--out-dir", required=True)
+
+    args = ap.parse_args()
+    if args.cmd == "fit":
+        v, ct, cs, dv, ps, dt = aggregate_bags(args.train)
+        print(f"training on {v.size} ticks at dt={dt:.4f}s")
+        prior = BicycleParams(v_max_mps=args.v_max, wheelbase_m=args.wheelbase)
+        fit, info = fit_params(v, ct, cs, dv, ps, dt, prior=prior)
+        out_payload = {"params": asdict(fit), "fit_info": info,
+                       "training_bags": list(args.train)}
+        os.makedirs(os.path.dirname(args.out) or ".", exist_ok=True)
+        with open(args.out, "w") as f:
+            json.dump(out_payload, f, indent=2)
+        print(f"wrote {args.out}")
+        for k, v_ in asdict(fit).items():
+            print(f"  {k}: {v_:.4f}")
+    elif args.cmd == "plot":
+        plot_diagnostics(args.bags, args.params, args.out_dir)
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    REPO = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    if REPO not in sys.path:
+        sys.path.insert(0, REPO)
+    main()
diff --git a/sim/kart_params.py b/sim/kart_params.py
new file mode 100644
index 00000000..97208c10
--- /dev/null
+++ b/sim/kart_params.py
@@ -0,0 +1,78 @@
+"""Physical kart parameters used by the simulator.
+
+This is the SOURCE OF TRUTH for the kart's physics: it mirrors values from
+``src/autonomous_kart/autonomous_kart/params/pathfinder.yaml``.  The sim uses
+these to model the kart; bag data is treated as ground truth to compare
+against (NOT as input to fit the bicycle).
+
+If you change the kart hardware (wheelbase, max steer, etc.) update this file
+and the yaml together — they MUST agree.
+
+A few values aren't documented in the yaml because they're effective model
+parameters (motor lag, mechanical slop) rather than direct mechanical limits.
+Those defaults are engineering estimates; document any change you make.
+"""
+from __future__ import annotations
+
+from dataclasses import asdict, dataclass
+
+
+@dataclass
+class KartPhysics:
+    # --- direct from pathfinder.yaml ----------------------------------------
+    v_max_mps: float = 12.0                # max kart speed
+    wheelbase_m: float = 1.05              # rear-axle to front-axle
+    steer_max_deg: float = 60.0            # physical front-wheel limit
+    steer_rate_max_degps: float = 180.0    # max steering slew rate
+    a_max_mps2: float = 2.0                # max forward accel (MPC planning bound)
+    a_min_mps2: float = -3.0               # max braking decel (MPC planning bound)
+    a_lat_max_mps2: float = 4.0            # tire-grip lateral cap (MPC planning bound)
+    # --- engineering estimates (NOT in yaml) --------------------------------
+    accel_tau: float = 0.6                 # first-order throttle lag (s)
+    brake_tau: float = 0.4                 # first-order brake lag (s)
+    steer_slop_deg: float = 0.0            # mechanical play in steering linkage
+    # --- conversion factors -------------------------------------------------
+    # steer_gain: ratio of "actual wheel angle" / "planner's commanded angle".
+    # 1.0 means cmd in degrees == wheel angle in degrees.  Set <1 if your
+    # e_comms layer scales the command (e.g. treats it as a percent).
+    steer_gain: float = 1.0
+    throttle_delay_s: float = 0.0          # actuator pipeline delay
+    steer_delay_s: float = 0.0
+    slip_angle_at_v: float = 0.0           # coarse tire-slip correction
+    # --- tire grip (calibrated from bag 231452) ----------------------------
+    # Real karts can't sustain unlimited a_lat — above this the tires slip
+    # (kart understeers) and speed bleeds toward sqrt(grip * L / |tan(delta)|).
+    # The bag shows the kart steady-states at v ≈ sqrt(grip/kappa) in tight
+    # turns: at kappa=0.14, v_steady ≈ 4.38 m/s → grip ≈ v² * kappa ≈ 2.7.
+    # NOTE: this is DIFFERENT from a_lat_max_mps2 above, which is the planner's
+    # cost-shaping bound (typically more optimistic than reality).
+    tire_a_lat_max_mps2: float = 2.7
+    tire_slip_tau_s: float = 0.2           # how fast slip bleeds excess speed
+
+    def to_bicycle_params(self):
+        """Convert to the BicycleParams dataclass the DataSim runtime uses."""
+        from sim.identify_bicycle import BicycleParams
+        # Map only the fields BicycleParams accepts.
+        fields = {
+            "accel_tau", "brake_tau", "throttle_delay_s", "steer_delay_s",
+            "steer_gain", "steer_slop_deg", "wheelbase_m",
+            "steer_rate_max_degps", "slip_angle_at_v", "v_max_mps",
+            "tire_a_lat_max_mps2", "tire_slip_tau_s",
+        }
+        return BicycleParams(**{k: v for k, v in asdict(self).items() if k in fields})
+
+    def to_kart_constants(self):
+        """Convert to the KartConstants the MPC planner reads."""
+        from autonomous_kart.nodes.pathfinder.planners.base import KartConstants
+        return KartConstants(
+            v_max_mps=self.v_max_mps,
+            wheelbase_m=self.wheelbase_m,
+            steer_max_deg=self.steer_max_deg,
+            steer_rate_max_degps=self.steer_rate_max_degps,
+            a_max_mps2=self.a_max_mps2,
+            a_min_mps2=self.a_min_mps2,
+            a_lat_max_mps2=self.a_lat_max_mps2,
+        )
+
+    def as_dict(self) -> dict:
+        return asdict(self)
diff --git a/sim/load_bags.py b/sim/load_bags.py
new file mode 100644
index 00000000..0fedbb00
--- /dev/null
+++ b/sim/load_bags.py
@@ -0,0 +1,272 @@
+"""Bag to aligned 60 Hz npz loader for the data-driven sim.
+
+See docs/superpowers/specs/2026-05-18-data-driven-sim-design.md section 4.
+"""
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Dict, List
+
+import numpy as np
+
+# Per-tick columns produced by the loader. Keep this list authoritative —
+# downstream code reads `AlignedBag.field_names()`.
+NUM_THROTTLE_HIST = 3
+NUM_STEER_HIST = 4
+
+
+@dataclass
+class AlignedBag:
+    t: np.ndarray              # (N,) seconds since bag start
+    cmd_throttle: np.ndarray   # (N,) % (0..100)
+    cmd_steer: np.ndarray      # (N,) deg
+    cmd_throttle_hist: np.ndarray  # (N, NUM_THROTTLE_HIST)
+    cmd_steer_hist: np.ndarray     # (N, NUM_STEER_HIST)
+    v: np.ndarray              # (N,) m/s, low-pass filtered wheel speed
+    psi_dot: np.ndarray        # (N,) rad/s, bias-corrected gyro_z in base_link FLU
+    accel_x: np.ndarray        # (N,) m/s^2, diagnostic only
+    dv_dt: np.ndarray          # (N,) m/s^2 (central diff of v)
+    state_mode: np.ndarray     # (N,) string-encoded state per CLAUDE.md STATES
+    odom_x: np.ndarray         # (N,) m, validation truth
+    odom_y: np.ndarray         # (N,) m
+    odom_yaw: np.ndarray       # (N,) rad
+    autonomous: np.ndarray     # (N,) bool — True iff state_mode == "AUTONOMOUS"
+
+    def field_names(self) -> List[str]:
+        return [f for f in self.__dataclass_fields__]
+
+    def to_npz_dict(self) -> Dict[str, np.ndarray]:
+        return {name: getattr(self, name) for name in self.field_names()}
+
+    @classmethod
+    def from_npz(cls, path: str) -> "AlignedBag":
+        data = np.load(path, allow_pickle=True)  # allow_pickle needed for string arrays (state_mode)
+        return cls(**{name: data[name] for name in cls.__dataclass_fields__})
+
+
+def zoh_align(
+    sample_t: np.ndarray,
+    sample_v: np.ndarray,
+    grid_t: np.ndarray,
+) -> np.ndarray:
+    """Zero-order hold from irregular (sample_t, sample_v) onto grid_t.
+
+    For each g in grid_t, returns the value of the most-recent sample with
+    sample_t <= g. Samples that pre-date the first grid point fill with the
+    first available sample (no NaN gap at the start).
+    """
+    if sample_t.size == 0:
+        raise ValueError("empty sample stream")
+    if not np.all(np.diff(sample_t) >= 0):
+        order = np.argsort(sample_t)
+        sample_t = sample_t[order]
+        sample_v = sample_v[order]
+    idx = np.searchsorted(sample_t, grid_t, side="right") - 1
+    idx = np.clip(idx, 0, sample_t.size - 1)
+    return sample_v[idx]
+
+
+def central_diff(x: np.ndarray, dt: float) -> np.ndarray:
+    """Central difference with reflected endpoints (returns same shape as x)."""
+    out = np.empty_like(x, dtype=np.float64)
+    out[1:-1] = (x[2:] - x[:-2]) / (2.0 * dt)
+    out[0] = (x[1] - x[0]) / dt
+    out[-1] = (x[-1] - x[-2]) / dt
+    return out
+
+
+def make_history(values: np.ndarray, depth: int) -> np.ndarray:
+    """Per-tick history block. Row i is [values[i-1], ..., values[i-depth]].
+    For i < k, pads with values[0] (matches the online learner startup)."""
+    n = values.size
+    out = np.zeros((n, depth), dtype=values.dtype)
+    for k in range(1, depth + 1):
+        out[:, k - 1] = np.concatenate(
+            [np.repeat(values[0], min(k, n)), values[:max(0, n - k)]]
+        )
+    return out
+
+
+# Mounting rotation matches imu_node R_MOUNT: diag(1, -1, -1).
+# +X forward, +Y left, +Z up in base_link FLU.
+_R_MOUNT = np.diag([1.0, -1.0, -1.0])
+
+
+def rotate_imu(raw_xyz: np.ndarray) -> np.ndarray:
+    """raw_xyz: (N, 3) in chip frame -> (N, 3) in base_link FLU."""
+    return raw_xyz @ _R_MOUNT.T
+
+
+def load_gyro_bias(cache_path: str) -> np.ndarray:
+    """Read the gyro bias from imu_calibration.json. Returns (3,) zeros if
+    missing — callers may choose to error instead."""
+    import json
+    import os
+    if not os.path.isfile(cache_path):
+        return np.zeros(3, dtype=np.float64)
+    with open(cache_path) as f:
+        data = json.load(f)
+    return np.asarray(data.get("gyro_bias", [0.0, 0.0, 0.0]), dtype=np.float64)
+
+
+TOPICS_NEEDED = {
+    "/cmd_drive",
+    "/imu",
+    "/odom",
+    "/e_comms/kart_speed_m_per_s",
+    "/system_state",
+}
+
+
+def read_bag_streams(mcap_path: str):
+    """Return {topic: [(t_seconds, msg), ...]} from one mcap file."""
+    from mcap_ros2.reader import read_ros2_messages
+    streams = {t: [] for t in TOPICS_NEEDED}
+    for evt in read_ros2_messages(mcap_path, topics=TOPICS_NEEDED):
+        t = evt.publish_time_ns * 1e-9
+        streams[evt.channel.topic].append((t, evt.ros_msg))
+    return streams
+
+
+def align_streams(
+    streams,
+    system_frequency: float,
+    gyro_bias_xyz: np.ndarray,
+    wheel_v_lowpass_hz: float = 10.0,
+) -> AlignedBag:
+    cmd = streams["/cmd_drive"]
+    imu = streams["/imu"]
+    odom = streams["/odom"]
+    wheel = streams["/e_comms/kart_speed_m_per_s"]
+    state = streams["/system_state"]
+
+    if not cmd or not imu or not odom or not wheel:
+        raise ValueError("bag missing one of /cmd_drive,/imu,/odom,/e_comms/kart_speed_m_per_s")
+
+    t0 = max(s[0][0] for s in (cmd, imu, odom, wheel))
+    t1 = min(s[-1][0] for s in (cmd, imu, odom, wheel))
+    dt = 1.0 / system_frequency
+    grid = np.arange(t0, t1, dt)
+    if grid.size < 60:
+        raise ValueError(f"bag too short: only {grid.size} ticks at {system_frequency} Hz")
+
+    cmd_t = np.array([t for t, _ in cmd])
+    cmd_throttle_raw = np.array([m.data[0] if len(m.data) >= 1 else 0.0 for _, m in cmd])
+    cmd_steer_raw = np.array([m.data[1] if len(m.data) >= 2 else 0.0 for _, m in cmd])
+    cmd_throttle = zoh_align(cmd_t, cmd_throttle_raw, grid)
+    cmd_steer = zoh_align(cmd_t, cmd_steer_raw, grid)
+
+    wheel_t = np.array([t for t, _ in wheel])
+    wheel_v_raw = np.array([m.data for _, m in wheel])
+    v_raw = zoh_align(wheel_t, wheel_v_raw, grid)
+    v = _lowpass_1pole(v_raw, dt, wheel_v_lowpass_hz)
+
+    imu_t = np.array([t for t, _ in imu])
+    gyro_xyz_raw = np.array([
+        [m.angular_velocity.x, m.angular_velocity.y, m.angular_velocity.z]
+        for _, m in imu
+    ])
+    accel_xyz_raw = np.array([
+        [m.linear_acceleration.x, m.linear_acceleration.y, m.linear_acceleration.z]
+        for _, m in imu
+    ])
+    gyro_xyz = rotate_imu(gyro_xyz_raw - gyro_bias_xyz[None, :])
+    accel_xyz = rotate_imu(accel_xyz_raw)
+    psi_dot = zoh_align(imu_t, gyro_xyz[:, 2], grid)
+    accel_x = zoh_align(imu_t, accel_xyz[:, 0], grid)
+
+    odom_t = np.array([t for t, _ in odom])
+    odom_x = zoh_align(odom_t, np.array([m.pose.pose.position.x for _, m in odom]), grid)
+    odom_y = zoh_align(odom_t, np.array([m.pose.pose.position.y for _, m in odom]), grid)
+    odom_yaw = zoh_align(odom_t, np.array([_quat_to_yaw(m.pose.pose.orientation) for _, m in odom]), grid)
+
+    if state:
+        state_t = np.array([t for t, _ in state])
+        state_str = np.array([m.data for _, m in state], dtype=object)
+        idx = np.clip(np.searchsorted(state_t, grid, side="right") - 1, 0, state_t.size - 1)
+        state_mode = state_str[idx]
+    else:
+        state_mode = np.full(grid.size, "UNKNOWN", dtype=object)
+    autonomous = (state_mode == "AUTONOMOUS")
+
+    dv_dt = central_diff(v, dt)
+    cmd_throttle_hist = make_history(cmd_throttle, NUM_THROTTLE_HIST)
+    cmd_steer_hist = make_history(cmd_steer, NUM_STEER_HIST)
+
+    return AlignedBag(
+        t=grid - grid[0],
+        cmd_throttle=cmd_throttle, cmd_steer=cmd_steer,
+        cmd_throttle_hist=cmd_throttle_hist, cmd_steer_hist=cmd_steer_hist,
+        v=v, psi_dot=psi_dot, accel_x=accel_x, dv_dt=dv_dt,
+        state_mode=state_mode,
+        odom_x=odom_x, odom_y=odom_y, odom_yaw=odom_yaw,
+        autonomous=autonomous,
+    )
+
+
+def _lowpass_1pole(x: np.ndarray, dt: float, fc_hz: float) -> np.ndarray:
+    rc = 1.0 / (2.0 * np.pi * fc_hz)
+    alpha = dt / (rc + dt)
+    out = np.empty_like(x, dtype=np.float64)
+    out[0] = x[0]
+    for i in range(1, x.size):
+        out[i] = out[i - 1] + alpha * (x[i] - out[i - 1])
+    return out
+
+
+def _quat_to_yaw(q) -> float:
+    siny = 2.0 * (q.w * q.z + q.x * q.y)
+    cosy = 1.0 - 2.0 * (q.y * q.y + q.z * q.z)
+    return float(np.arctan2(siny, cosy))
+
+
+def find_bag_file(bag_dir: str) -> str:
+    """Return the canonical mcap path for a bag directory, preferring
+    recovered.mcap when present (for truncated bags like 215024)."""
+    import glob
+    import os
+    cand = os.path.join(bag_dir, "recovered.mcap")
+    if os.path.isfile(cand):
+        return cand
+    matches = sorted(glob.glob(os.path.join(bag_dir, "*.mcap")))
+    if not matches:
+        raise FileNotFoundError(f"no .mcap in {bag_dir}")
+    return matches[0]
+
+
+def main():
+    import argparse
+    import os
+    ap = argparse.ArgumentParser()
+    ap.add_argument("bag", help="path to a bag directory (e.g. bags/mpc_run_...)")
+    ap.add_argument("--out", required=True, help="output .npz path")
+    ap.add_argument("--system-frequency", type=float, default=60.0)
+    ap.add_argument("--gyro-bias-cache",
+                    default=os.path.join(os.getcwd(), "imu_calibration.json"))
+    args = ap.parse_args()
+
+    mcap_path = find_bag_file(args.bag)
+    print(f"reading {mcap_path}")
+    streams = read_bag_streams(mcap_path)
+    bias = load_gyro_bias(args.gyro_bias_cache)
+    print(f"gyro_bias = {bias}")
+    bag = align_streams(streams, args.system_frequency, bias)
+
+    print(
+        f"  ticks={bag.t.size}  duration={bag.t[-1]:.1f}s  "
+        f"auto_frac={bag.autonomous.mean():.2f}  "
+        f"v[mean={bag.v.mean():.2f}, max={bag.v.max():.2f}]  "
+        f"psi_dot[std={bag.psi_dot.std():.2f}]"
+    )
+    os.makedirs(os.path.dirname(args.out) or ".", exist_ok=True)
+    np.savez_compressed(args.out, **bag.to_npz_dict())
+    print(f"wrote {args.out}")
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    REPO = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    if REPO not in sys.path:
+        sys.path.insert(0, REPO)
+    main()
diff --git a/sim/model/bicycle_params_legacy_bagfit.json b/sim/model/bicycle_params_legacy_bagfit.json
new file mode 100644
index 00000000..55ce3de5
--- /dev/null
+++ b/sim/model/bicycle_params_legacy_bagfit.json
@@ -0,0 +1,48 @@
+{
+  "params": {
+    "accel_tau": 0.6,
+    "brake_tau": 0.4,
+    "throttle_delay_s": 1e-10,
+    "steer_delay_s": 1e-10,
+    "steer_gain": 1.0,
+    "steer_slop_deg": 0.7913420097618941,
+    "wheelbase_m": 1.1003142014724272,
+    "steer_rate_max_degps": 180.0,
+    "slip_angle_at_v": -0.1884535407901888,
+    "v_max_mps": 12.0
+  },
+  "fit_info": {
+    "cost": 85958.63796924372,
+    "nfev": 15,
+    "success": true,
+    "sigma_v_rough": 1.1269939613139455,
+    "sigma_p_rough": 0.3576502527752695,
+    "param_order": [
+      "accel_tau",
+      "brake_tau",
+      "throttle_delay_s",
+      "steer_delay_s",
+      "steer_gain",
+      "steer_slop_deg",
+      "wheelbase_m",
+      "steer_rate_max_degps",
+      "slip_angle_at_v"
+    ],
+    "x_final": [
+      4.935379602520705,
+      4.999999998747633,
+      1e-10,
+      1e-10,
+      0.24489874243929322,
+      0.7913420097618941,
+      1.1003142014724272,
+      30.00000017051059,
+      -0.1884535407901888
+    ],
+    "manual_overrides_note": "Several params overridden post-fit because the training bag was 74% stops (v < 0.5 m/s), which dragged the regression away from the kart's actual high-speed driving behavior. Reverted to bicycle defaults / pathfinder.yaml values:  steer_gain=1.0 (was 0.245), steer_rate_max_degps=180 (was 30), accel_tau=0.6 (was 4.9), brake_tau=0.4 (was 5.0). Re-identify these with stops filtered out for a cleaner fit; for now this gives a sim that tracks the line at 6 m/s to match the real kart's normal operation."
+  },
+  "training_bags": [
+    "docs/superpowers/data_sim/cache/215024.npz",
+    "docs/superpowers/data_sim/cache/232625.npz"
+  ]
+}
\ No newline at end of file
diff --git a/sim/model/clamp.json b/sim/model/clamp.json
new file mode 100644
index 00000000..934b0827
--- /dev/null
+++ b/sim/model/clamp.json
@@ -0,0 +1,162 @@
+{
+  "mean": [
+    -3.622347503551282e-05,
+    -4.7494907562395383e-07,
+    -3.481137173366733e-05,
+    -2.3936604520713445e-06,
+    -7.244829612318426e-05,
+    -2.7927037081099115e-05,
+    -4.8886726290220395e-05,
+    -1.928525250605162e-07,
+    -3.1945442060532514e-06,
+    2.365051841479726e-06,
+    1.8522185882829945e-06
+  ],
+  "cov_inv": [
+    [
+      1.1286884987285855,
+      0.02761257135429068,
+      -0.06630396557357165,
+      -0.08441366793705705,
+      -0.0025375994635208616,
+      -0.0011232587393130652,
+      -0.27955741981537396,
+      -0.0008012255435661687,
+      -0.002739000332770218,
+      -0.0050978212646418655,
+      -0.033043542866325444
+    ],
+    [
+      0.027612571354290665,
+      1.1535368585009154,
+      0.045300550052850816,
+      0.1363346834302017,
+      -0.06702122138926007,
+      0.12120882535053482,
+      -0.035109116562230334,
+      0.028897958081670755,
+      -0.046797126202675364,
+      0.16475189455324968,
+      -0.7036439158340387
+    ],
+    [
+      -0.06630396557356964,
+      0.04530055005285013,
+      48.523877724667265,
+      1.8159159840373245,
+      -47.6163263093276,
+      -0.2980154405171637,
+      -0.08386786261114668,
+      -2.1315250363003386,
+      0.00928254292771211,
+      -0.1623477664023229,
+      0.41747683803403
+    ],
+    [
+      -0.08441366793705374,
+      0.13633468343020652,
+      1.8159159840374572,
+      116.09422904628357,
+      -2.026059226553666,
+      0.2100885866665303,
+      0.00012758406569400677,
+      -106.99512379282544,
+      -5.2101354862311124,
+      -2.073152537818971,
+      -1.4034874038572402
+    ],
+    [
+      -0.002537599463513679,
+      -0.06702122138925927,
+      -47.61632630932786,
+      -2.0260592265536284,
+      95.2660920536202,
+      -47.3417384818308,
+      -0.2997652323006166,
+      4.130800869794289,
+      -2.122666431209527,
+      0.18784797243661633,
+      -0.14900009878605364
+    ],
+    [
+      -0.0011232587393164592,
+      0.12120882535053393,
+      -0.2980154405167545,
+      0.2100885866666209,
+      -47.34173848183147,
+      95.30582185724042,
+      -47.6507345281836,
+      -2.196518512645724,
+      4.119096024355884,
+      -2.134586934897221,
+      -0.04421888393769209
+    ],
+    [
+      -0.2795574198153711,
+      -0.03510911656223011,
+      -0.08386786261131109,
+      0.00012758406569860455,
+      -0.2997652323001972,
+      -47.65073452818386,
+      48.62598913818152,
+      0.19165302204387763,
+      -2.0153837703773316,
+      2.128221565653749,
+      -0.3297070020982402
+    ],
+    [
+      -0.0008012255435656155,
+      0.028897958081674523,
+      -2.1315250363005624,
+      -106.99512379282689,
+      4.130800869794275,
+      -2.1965185126454156,
+      0.1916530220438031,
+      214.763588977752,
+      -102.29462983814074,
+      -3.3350861700848284,
+      -2.106996557910279
+    ],
+    [
+      -0.002739000332774491,
+      -0.0467971262026687,
+      0.009282542927778473,
+      -5.21013548622966,
+      -2.122666431209401,
+      4.119096024355476,
+      -2.015383770377112,
+      -102.29462983814214,
+      215.06139515509236,
+      -102.33581689450118,
+      -5.175450043326
+    ],
+    [
+      -0.0050978212646360065,
+      0.16475189455324746,
+      -0.16234776640226034,
+      -2.073152537817509,
+      0.18784797243642526,
+      -2.1345869348968134,
+      2.1282215656534658,
+      -3.3350861700861225,
+      -102.33581689450129,
+      214.87950756024938,
+      -107.15261789140574
+    ],
+    [
+      -0.033043542866326,
+      -0.7036439158340392,
+      0.41747683803398833,
+      -1.403487403858695,
+      -0.1490000987859358,
+      -0.04421888393790997,
+      -0.3297070020980969,
+      -2.106996557908995,
+      -5.175450043325868,
+      -107.15261789140571,
+      116.5517062741477
+    ]
+  ],
+  "d_warn": 5.6839256087382815,
+  "d_max": 12.17623312379873
+}
\ No newline at end of file
diff --git a/sim/model/mlp.pt b/sim/model/mlp.pt
new file mode 100644
index 00000000..f9883b05
Binary files /dev/null and b/sim/model/mlp.pt differ
diff --git a/sim/model/noise.json b/sim/model/noise.json
new file mode 100644
index 00000000..0d97d9d3
--- /dev/null
+++ b/sim/model/noise.json
@@ -0,0 +1,6 @@
+{
+  "sigma_v": 1.0803687007816047,
+  "sigma_psidot": 0.07001745457052427,
+  "sigma_steer_cmd": 1.3422241559633976,
+  "sigma_throttle_cmd": 0.2089971907156297
+}
\ No newline at end of file
diff --git a/sim/model/norm.json b/sim/model/norm.json
new file mode 100644
index 00000000..dd6970dc
--- /dev/null
+++ b/sim/model/norm.json
@@ -0,0 +1,28 @@
+{
+  "mean": [
+    3.8694233894348145,
+    0.04788755252957344,
+    4.569118499755859,
+    0.851321816444397,
+    4.568946361541748,
+    4.568777084350586,
+    4.568606376647949,
+    0.8503996729850769,
+    0.8492521643638611,
+    0.8481919765472412,
+    0.8471358418464661
+  ],
+  "std": [
+    1.4805134534835815,
+    0.3576469123363495,
+    1.286541223526001,
+    15.19983959197998,
+    1.2867465019226074,
+    1.2869306802749634,
+    1.287204384803772,
+    15.19759464263916,
+    15.195440292358398,
+    15.19322681427002,
+    15.19059944152832
+  ]
+}
\ No newline at end of file
diff --git a/sim/model/residual_emp.npz b/sim/model/residual_emp.npz
new file mode 100644
index 00000000..b0cb2f6b
Binary files /dev/null and b/sim/model/residual_emp.npz differ
diff --git a/sim/noise_model.py b/sim/noise_model.py
new file mode 100644
index 00000000..1773a922
--- /dev/null
+++ b/sim/noise_model.py
@@ -0,0 +1,127 @@
+"""Empirical noise / error model for the data-driven sim. Section 5 of the spec."""
+from __future__ import annotations
+
+import json
+from dataclasses import asdict, dataclass
+
+import numpy as np
+
+
+@dataclass
+class NoiseModel:
+    sigma_v: float
+    sigma_psidot: float
+    sigma_steer_cmd: float
+    sigma_throttle_cmd: float
+
+    def save(self, path: str) -> None:
+        with open(path, "w") as f:
+            json.dump(asdict(self), f, indent=2)
+
+    @classmethod
+    def load(cls, path: str) -> "NoiseModel":
+        with open(path) as f:
+            return cls(**json.load(f))
+
+
+def estimate_noise(
+    v_residual: np.ndarray,
+    psi_dot_residual: np.ndarray,
+    cmd_throttle: np.ndarray,
+    cmd_steer: np.ndarray,
+    is_straight: np.ndarray,
+    is_steady_throttle: np.ndarray,
+) -> NoiseModel:
+    sigma_v = float(np.std(v_residual))
+    sigma_psidot = float(np.std(psi_dot_residual))
+    sigma_steer_cmd = _jitter_std(cmd_steer, mask=is_straight, window=15)
+    sigma_throttle_cmd = _jitter_std(cmd_throttle, mask=is_steady_throttle, window=15)
+    return NoiseModel(sigma_v=sigma_v, sigma_psidot=sigma_psidot,
+                      sigma_steer_cmd=sigma_steer_cmd,
+                      sigma_throttle_cmd=sigma_throttle_cmd)
+
+
+def _jitter_std(x: np.ndarray, mask: np.ndarray, window: int) -> float:
+    if mask.sum() < window * 2:
+        return float(np.std(x))
+    kernel = np.ones(window) / window
+    smoothed = np.convolve(x, kernel, mode="same")
+    return float(np.std((x - smoothed)[mask]))
+
+
+def compute_pre_mlp_artifacts(
+    train_npz_paths,
+    params_path: str,
+    out_noise_json: str,
+    out_residual_emp: str,
+) -> None:
+    """Predict bicycle derivatives on the training bags, compute residuals,
+    write the noise model and the empirical residual pool used for bootstrap."""
+    import os
+    from sim.identify_bicycle import BicycleParams, predict_derivatives
+    from sim.load_bags import AlignedBag
+
+    with open(params_path) as f:
+        p = BicycleParams(**json.load(f)["params"])
+
+    v_all, dv_res_all, psi_res_all = [], [], []
+    ct_all, cs_all = [], []
+    is_straight_all, is_steady_all = [], []
+    for path in train_npz_paths:
+        bag = AlignedBag.from_npz(path)
+        mask = bag.autonomous & (bag.v > 0.5)
+        if mask.sum() < 100:
+            continue
+        dt = float(np.median(np.diff(bag.t)))
+        dv_m, psi_m = predict_derivatives(p, bag.v, bag.cmd_throttle, bag.cmd_steer, dt)
+        dv_res_all.append(bag.dv_dt[mask] - dv_m[mask])
+        psi_res_all.append(bag.psi_dot[mask] - psi_m[mask])
+        ct_all.append(bag.cmd_throttle[mask])
+        cs_all.append(bag.cmd_steer[mask])
+        v_all.append(bag.v[mask])
+        is_straight_all.append(np.abs(bag.cmd_steer[mask]) < 2.0)
+        d_throttle = np.abs(np.diff(bag.cmd_throttle[mask], prepend=bag.cmd_throttle[mask][0]))
+        is_steady_all.append(d_throttle < 2.0)
+
+    dv_res = np.concatenate(dv_res_all)
+    psi_res = np.concatenate(psi_res_all)
+    ct = np.concatenate(ct_all)
+    cs = np.concatenate(cs_all)
+    v_arr = np.concatenate(v_all)
+    is_straight = np.concatenate(is_straight_all)
+    is_steady = np.concatenate(is_steady_all)
+    nm = estimate_noise(dv_res, psi_res, ct, cs, is_straight, is_steady)
+    nm.save(out_noise_json)
+    print(f"wrote {out_noise_json}: {asdict(nm)}")
+
+    v_edges = np.linspace(0.0, 15.0, 6)
+    cs_edges = np.linspace(0.0, 30.0, 4)
+    v_bin = np.clip(np.digitize(v_arr, v_edges) - 1, 0, v_edges.size - 2)
+    cs_bin = np.clip(np.digitize(np.abs(cs), cs_edges) - 1, 0, cs_edges.size - 2)
+    np.savez_compressed(
+        out_residual_emp,
+        dv_res=dv_res, psi_res=psi_res,
+        v_bin=v_bin, cs_bin=cs_bin,
+        v_edges=v_edges, cs_edges=cs_edges,
+    )
+    print(f"wrote {out_residual_emp}: {dv_res.size} samples")
+
+
+def main():
+    import argparse
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--train", nargs="+", required=True)
+    ap.add_argument("--params", required=True)
+    ap.add_argument("--out-noise", required=True)
+    ap.add_argument("--out-residual-emp", required=True)
+    args = ap.parse_args()
+    compute_pre_mlp_artifacts(args.train, args.params, args.out_noise, args.out_residual_emp)
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    REPO = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    if REPO not in sys.path:
+        sys.path.insert(0, REPO)
+    main()
diff --git a/sim/render.py b/sim/render.py
new file mode 100644
index 00000000..41912e23
--- /dev/null
+++ b/sim/render.py
@@ -0,0 +1,239 @@
+"""GIF render of a SimResult on the racing-line track. matplotlib-based."""
+from __future__ import annotations
+
+import os
+import sys
+from typing import Optional, Sequence
+
+import numpy as np
+
+
+HERE = os.path.dirname(os.path.abspath(__file__))
+REPO = os.path.dirname(HERE)
+if REPO not in sys.path:
+    sys.path.insert(0, REPO)
+
+
+def load_line_xy(path: str) -> tuple[np.ndarray, np.ndarray]:
+    """Load (lx, ly) from a racing-line CSV (cols x at idx 1, y at idx 2)."""
+    lx, ly = [], []
+    with open(path) as f:
+        for row in f:
+            parts = row.strip().split(",")
+            if len(parts) < 3:
+                continue
+            try:
+                lx.append(float(parts[1]))
+                ly.append(float(parts[2]))
+            except ValueError:
+                continue
+    return np.asarray(lx, dtype=np.float64), np.asarray(ly, dtype=np.float64)
+
+
+def _tangent_normal(lx: np.ndarray, ly: np.ndarray):
+    dx = np.empty_like(lx)
+    dy = np.empty_like(ly)
+    dx[1:-1] = lx[2:] - lx[:-2]
+    dy[1:-1] = ly[2:] - ly[:-2]
+    dx[0] = lx[1] - lx[0]
+    dy[0] = ly[1] - ly[0]
+    dx[-1] = lx[-1] - lx[-2]
+    dy[-1] = ly[-1] - ly[-2]
+    L = np.hypot(dx, dy)
+    L = np.where(L < 1e-9, 1.0, L)
+    tx, ty = dx / L, dy / L
+    return -ty, tx  # left-hand normal
+
+
+def render_gif(
+    xs: np.ndarray,
+    ys: np.ndarray,
+    yaws: np.ndarray,
+    vs: np.ndarray,
+    ds: np.ndarray,
+    times: np.ndarray,
+    line_path: str,
+    out_path: str,
+    *,
+    mpc_params: Optional[dict] = None,
+    track_half_width: float = 2.0,
+    kart_length_m: float = 1.05,
+    kart_width_m: float = 0.5,
+    fps: int = 30,
+    max_frames: int = 5000,
+    realtime_factor: float = 1.0,
+    title: str = "Kart sim",
+) -> None:
+    """Render an animated GIF of the kart's trajectory.
+
+    realtime_factor:
+        Ratio of sim time to GIF time. 1.0 = play in real time
+        (60 Hz sim, 30 fps GIF => stride 2). 5.0 = 5x sped up. <1 = slow-mo.
+        ``max_frames`` is a hard cap to prevent huge GIFs on long runs;
+        the visual will then play faster than ``realtime_factor`` requested.
+    """
+    import matplotlib
+    matplotlib.use("Agg")
+    import matplotlib.pyplot as plt
+    import matplotlib.patches as mpatches
+    from matplotlib.animation import FuncAnimation, PillowWriter
+
+    lx, ly = load_line_xy(line_path)
+    nx, ny = _tangent_normal(lx, ly)
+    left_x = lx + nx * track_half_width
+    left_y = ly + ny * track_half_width
+    right_x = lx - nx * track_half_width
+    right_y = ly - ny * track_half_width
+
+    # Decimate frames.  Aim for `realtime_factor` sim-seconds per GIF-second:
+    #   stride_ticks = realtime_factor * sim_freq / fps
+    # so each GIF frame advances `stride_ticks` of the sim. max_frames caps
+    # the total frame count to keep file size manageable on long runs.
+    N = len(xs)
+    if N >= 2:
+        sim_dt = float(times[1] - times[0]) if times.size >= 2 else 1.0 / 60.0
+        sim_freq = 1.0 / max(sim_dt, 1e-6)
+        stride_realtime = max(1, int(round(realtime_factor * sim_freq / fps)))
+    else:
+        stride_realtime = 1
+    # If the realtime stride would still produce more than max_frames frames,
+    # bump stride up to fit (visual will be sped up beyond realtime_factor).
+    stride = max(stride_realtime, (N + max_frames - 1) // max_frames if max_frames > 0 else 1)
+    idxs = np.arange(0, N, stride)[:max_frames]
+    n_frames = len(idxs)
+    eff_factor = stride * fps * (times[1] - times[0] if times.size >= 2 else 1.0 / 60.0)
+    print(f"render: N={N} ticks, stride={stride}, frames={n_frames}, "
+          f"effective realtime_factor={eff_factor:.2f}x", flush=True)
+
+    # Fixed view limits
+    all_x = np.concatenate([xs, lx])
+    all_y = np.concatenate([ys, ly])
+    margin = 5.0
+    xlim = (float(all_x.min()) - margin, float(all_x.max()) + margin)
+    ylim = (float(all_y.min()) - margin, float(all_y.max()) + margin)
+
+    fig, ax = plt.subplots(figsize=(8, 6))
+    ax.set_aspect("equal")
+    ax.set_xlim(xlim)
+    ax.set_ylim(ylim)
+    ax.set_title(title, fontsize=9)
+    ax.axis("off")
+
+    # Static background
+    ax.plot(left_x, left_y, color="#444", lw=1.0)
+    ax.plot(right_x, right_y, color="#444", lw=1.0)
+    ax.plot(lx, ly, color="#999", lw=0.5, ls="--")
+
+    trail_line, = ax.plot([], [], color="#0a7", alpha=0.5, lw=1.5)
+
+    # Kart rectangle (initially off-screen)
+    kart_rect = mpatches.Rectangle(
+        (-kart_length_m / 2, -kart_width_m / 2),
+        kart_length_m,
+        kart_width_m,
+        color="#0a7",
+        zorder=5,
+    )
+    ax.add_patch(kart_rect)
+
+    info_text = ax.text(
+        0.01, 0.99, "",
+        transform=ax.transAxes,
+        va="top", ha="left",
+        fontsize=7,
+        family="monospace",
+        color="#111",
+    )
+
+    # Build MPC param snippet if provided
+    param_keys = ["target_speed_pct", "w_d", "w_heading", "w_progress", "w_boundary", "w_terminal_d"]
+    param_text_str = ""
+    if mpc_params is not None:
+        lines_p = []
+        for k in param_keys:
+            if k in mpc_params:
+                lines_p.append(f"{k}={mpc_params[k]:.3g}")
+        param_text_str = "\n".join(lines_p[:6])
+    param_text = ax.text(
+        0.01, 0.01, param_text_str,
+        transform=ax.transAxes,
+        va="bottom", ha="left",
+        fontsize=6,
+        family="monospace",
+        color="#555",
+    )
+
+    trail_len = 30
+
+    def _update(frame_idx):
+        k = int(idxs[frame_idx])
+        # Trail
+        t_start = max(0, frame_idx - trail_len)
+        trail_idxs = idxs[t_start : frame_idx + 1]
+        trail_line.set_data(xs[trail_idxs], ys[trail_idxs])
+
+        # Kart rectangle: translate + rotate
+        import matplotlib.transforms as mtransforms
+        cx, cy = float(xs[k]), float(ys[k])
+        yaw_deg = float(np.degrees(yaws[k]))
+        t = (
+            mtransforms.Affine2D()
+            .rotate_deg(yaw_deg)
+            .translate(cx, cy)
+            + ax.transData
+        )
+        kart_rect.set_transform(t)
+
+        # Text overlay
+        info_text.set_text(
+            f"t={float(times[k]):.1f}s  v={float(vs[k]):.2f}m/s  d={float(ds[k]):+.2f}m"
+        )
+        return trail_line, kart_rect, info_text, param_text
+
+    anim = FuncAnimation(fig, _update, frames=n_frames, interval=1000 // fps, blit=True)
+
+    os.makedirs(os.path.dirname(os.path.abspath(out_path)), exist_ok=True)
+    writer = PillowWriter(fps=fps)
+    anim.save(out_path, writer=writer)
+    plt.close(fig)
+    print(f"wrote {out_path} ({n_frames} frames @ {fps} fps)")
+
+
+if __name__ == "__main__":
+    import argparse
+    import json
+
+    ap = argparse.ArgumentParser(description="Render a trajectory.npz as a GIF.")
+    ap.add_argument("--trajectory", required=True)
+    ap.add_argument("--line", required=True)
+    ap.add_argument("--out", required=True)
+    ap.add_argument("--params", default=None)
+    ap.add_argument("--fps", type=int, default=30)
+    ap.add_argument("--max-frames", type=int, default=5000,
+                    help="hard cap on GIF frames (default 5000)")
+    ap.add_argument("--realtime-factor", type=float, default=1.0,
+                    help="sim-seconds per GIF-second (1.0 = real time, 5 = 5x sped up)")
+    ap.add_argument("--title", default="Kart sim")
+    args = ap.parse_args()
+
+    data = np.load(args.trajectory)
+    mpc_params = None
+    if args.params:
+        with open(args.params) as f:
+            mpc_params = json.load(f)
+
+    render_gif(
+        xs=data["xs"],
+        ys=data["ys"],
+        yaws=data["yaws"],
+        vs=data["vs"],
+        ds=data["ds"],
+        times=data["times"],
+        line_path=args.line,
+        out_path=args.out,
+        mpc_params=mpc_params,
+        fps=args.fps,
+        max_frames=args.max_frames,
+        realtime_factor=args.realtime_factor,
+        title=args.title,
+    )
diff --git a/sim/requirements.txt b/sim/requirements.txt
new file mode 100644
index 00000000..c597d07d
--- /dev/null
+++ b/sim/requirements.txt
@@ -0,0 +1,21 @@
+numpy>=1.24,<2
+scipy>=1.10
+
+# ML
+torch>=2.2
+
+# Plotting / GIF render
+matplotlib>=3.7
+imageio>=2.31
+pillow>=10.0
+
+# Hyperparameter sweeps
+optuna>=3.5
+joblib>=1.3
+
+# Bag loading
+mcap>=1.1
+mcap-ros2-support>=0.5
+
+# Tests
+pytest>=7
diff --git a/sim/residual_mlp.py b/sim/residual_mlp.py
new file mode 100644
index 00000000..5ec68b29
--- /dev/null
+++ b/sim/residual_mlp.py
@@ -0,0 +1,328 @@
+"""Residual MLP for the data-driven sim. Section 6 of the spec."""
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+FEATURE_DIM = 11  # v + psi_dot + cmd_throttle + cmd_steer + hist[3] + hist[4]
+
+
+def build_features(
+    v: np.ndarray,
+    psi_dot: np.ndarray,
+    cmd_throttle: np.ndarray,
+    cmd_steer: np.ndarray,
+    cmd_throttle_hist: np.ndarray,
+    cmd_steer_hist: np.ndarray,
+) -> np.ndarray:
+    """Stack per-tick features in the layout the MLP expects."""
+    return np.concatenate(
+        [
+            v[:, None], psi_dot[:, None],
+            cmd_throttle[:, None], cmd_steer[:, None],
+            cmd_throttle_hist, cmd_steer_hist,
+        ],
+        axis=1,
+    ).astype(np.float32)
+
+
+@dataclass
+class Normaliser:
+    mean: np.ndarray
+    std: np.ndarray
+
+    def apply(self, x: np.ndarray) -> np.ndarray:
+        return (x - self.mean) / np.maximum(self.std, 1e-6)
+
+    def to_json(self) -> dict:
+        return {"mean": self.mean.tolist(), "std": self.std.tolist()}
+
+    @classmethod
+    def from_json(cls, d: dict) -> "Normaliser":
+        return cls(mean=np.asarray(d["mean"], dtype=np.float32),
+                   std=np.asarray(d["std"], dtype=np.float32))
+
+
+class ResidualMLP(nn.Module):
+    def __init__(self, hidden: int = 64):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(FEATURE_DIM, hidden), nn.Tanh(),
+            nn.Linear(hidden, hidden), nn.Tanh(),
+            nn.Linear(hidden, 2),
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.net(x)
+
+
+def _precompute_steer_actual(params, cmd_steer: np.ndarray, dt: float) -> np.ndarray:
+    """Run steer dynamics forward and return the accumulated delta (rad) at each tick.
+
+    This matches the internal `actual` variable in predict_derivatives, allowing
+    rollout to start with the correct steer state at any anchor index without
+    re-running the full prefix each time.
+    """
+    p = params
+    slop = np.deg2rad(max(0.0, p.steer_slop_deg))
+    rate_lim = np.deg2rad(p.steer_rate_max_degps) * dt
+    n = cmd_steer.size
+    actual_arr = np.empty(n, dtype=np.float64)
+    actual = 0.0
+    for i in range(n):
+        c = np.deg2rad(p.steer_gain * float(cmd_steer[i]))
+        if c > actual + slop:
+            target = c - slop
+        elif c < actual - slop:
+            target = c + slop
+        else:
+            target = actual
+        actual += float(np.clip(target - actual, -rate_lim, rate_lim))
+        actual_arr[i] = actual
+    return actual_arr
+
+
+def _bicycle_step_stateful(
+    params, v: float, cmd_throttle: float, cmd_steer: float,
+    dt: float, steer_actual: float
+) -> tuple:
+    """One-step bicycle prediction given pre-accumulated steer_actual (rad).
+    Returns (dv_dt, psi_dot, new_steer_actual)."""
+    p = params
+    slop = np.deg2rad(max(0.0, p.steer_slop_deg))
+    rate_lim = np.deg2rad(p.steer_rate_max_degps) * dt
+
+    target_v = np.clip(cmd_throttle / 100.0, 0.0, 1.0) * p.v_max_mps
+    tau = p.accel_tau if target_v >= v else p.brake_tau
+    dv_dt = (target_v - v) / max(tau, 1e-3)
+
+    c = np.deg2rad(p.steer_gain * cmd_steer)
+    if c > steer_actual + slop:
+        t = c - slop
+    elif c < steer_actual - slop:
+        t = c + slop
+    else:
+        t = steer_actual
+    new_actual = steer_actual + float(np.clip(t - steer_actual, -rate_lim, rate_lim))
+
+    psi_dot_kin = v * np.tan(new_actual) / max(p.wheelbase_m, 1e-3)
+    psi_dot = psi_dot_kin - p.slip_angle_at_v * v * np.sign(new_actual) * new_actual
+    return dv_dt, psi_dot, new_actual
+
+
+def _rollout_loss(
+    net: ResidualMLP, norm: Normaliser,
+    bag_dict: dict, anchors: np.ndarray, horizon: int, dt: float,
+    sigma_v: float, sigma_psidot: float,
+    bicycle_predict,
+    device: str = "cpu",
+) -> torch.Tensor:
+    """For each anchor index, roll the sim forward `horizon` ticks feeding
+    the bag's actual command sequence and penalise (v, psi_dot) drift from
+    the bag at every intermediate step. Gradients flow only through the MLP
+    output at each step (state integration is detached).
+
+    `bicycle_predict` is a (params, steer_actual_arr) tuple where steer_actual_arr
+    is the precomputed per-tick steer delta (rad) from _precompute_steer_actual.
+    """
+    params, steer_actual_arr = bicycle_predict
+    mean_t = torch.from_numpy(norm.mean).to(device)
+    std_t = torch.from_numpy(np.maximum(norm.std, 1e-6)).to(device)
+
+    losses = []
+    for a in anchors:
+        if a + horizon >= bag_dict["v"].size:
+            continue
+        v = float(bag_dict["v"][a])
+        psi_dot = float(bag_dict["psi_dot"][a])
+        # Steer state at anchor: use precomputed value from tick before anchor
+        steer_actual = float(steer_actual_arr[a - 1]) if a > 0 else 0.0
+        rt_loss = torch.zeros((), device=device)
+        for h in range(horizon):
+            i = a + h
+            dv_struct, psi_struct, steer_actual = _bicycle_step_stateful(
+                params, v,
+                float(bag_dict["cmd_throttle"][i]),
+                float(bag_dict["cmd_steer"][i]),
+                dt, steer_actual,
+            )
+            feat = build_features(
+                np.array([v]), np.array([psi_dot]),
+                bag_dict["cmd_throttle"][i:i + 1],
+                bag_dict["cmd_steer"][i:i + 1],
+                bag_dict["cmd_throttle_hist"][i:i + 1],
+                bag_dict["cmd_steer_hist"][i:i + 1],
+            )
+            x = (torch.from_numpy(feat).to(device) - mean_t) / std_t
+            res = net(x).squeeze(0)
+            # dv_pred and psi_pred are tensors carrying gradients through res
+            dv_pred = dv_struct + res[0]
+            psi_pred = psi_struct + res[1]
+            target_v = float(bag_dict["v"][i + 1])
+            target_psi = float(bag_dict["psi_dot"][i + 1])
+            # Loss terms: v_next = v + dv_pred*dt; psi_next = psi_pred
+            # Gradients flow through dv_pred and psi_pred to the MLP
+            v_next = v + dv_pred * dt
+            rt_loss = rt_loss + ((v_next - target_v) ** 2) / max(sigma_v, 1e-3) ** 2
+            rt_loss = rt_loss + ((psi_pred - target_psi) ** 2) / max(sigma_psidot, 1e-3) ** 2
+            # State update: detach so next step's features are based on predicted scalars
+            v = float(v_next.detach())
+            psi_dot = float(psi_pred.detach())
+        losses.append(rt_loss / horizon)
+    if not losses:
+        return torch.zeros((), device=device)
+    return torch.stack(losses).mean()
+
+
+def _bicycle_predict_fn(params):
+    from sim.identify_bicycle import predict_derivatives
+    def fn(v, ct, cs, dt):
+        return predict_derivatives(params, v, ct, cs, dt)
+    return fn
+
+
+def _stack_training_data(train_npz_paths, params, autonomous_only=True, speed_floor=0.5):
+    from sim.load_bags import AlignedBag
+    feats_all, tgt_all = [], []
+    bag_dicts = []
+    dt_last = None
+    for path in train_npz_paths:
+        bag = AlignedBag.from_npz(path)
+        mask = (bag.autonomous if autonomous_only else np.ones_like(bag.autonomous)) & (bag.v > speed_floor)
+        if mask.sum() < 100:
+            continue
+        dt = float(np.median(np.diff(bag.t)))
+        dt_last = dt
+        dv_m, psi_m = _bicycle_predict_fn(params)(bag.v, bag.cmd_throttle, bag.cmd_steer, dt)
+        feats = build_features(bag.v, bag.psi_dot, bag.cmd_throttle, bag.cmd_steer,
+                               bag.cmd_throttle_hist, bag.cmd_steer_hist)
+        target = np.stack([bag.dv_dt - dv_m, bag.psi_dot - psi_m], axis=1).astype(np.float32)
+        feats_all.append(feats[mask])
+        tgt_all.append(target[mask])
+        bag_dicts.append({k: getattr(bag, k) for k in bag.field_names()})
+    return np.concatenate(feats_all), np.concatenate(tgt_all), bag_dicts, dt_last
+
+
+def _valid_anchor_indices(bag_dict: dict, horizon: int, speed_floor: float = 0.5) -> np.ndarray:
+    """Return indices safe to use as rollout anchors: v > speed_floor with horizon room."""
+    v = bag_dict["v"]
+    n = v.size
+    idx = np.where((v > speed_floor) & (np.arange(n) + horizon < n))[0]
+    return idx
+
+
+def main():
+    import argparse
+    import json
+    import os
+    from sim.identify_bicycle import BicycleParams
+    from sim.noise_model import NoiseModel
+
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--train", nargs="+", required=True)
+    ap.add_argument("--val", nargs="+", required=True)
+    ap.add_argument("--params", required=True)
+    ap.add_argument("--noise", required=True)
+    ap.add_argument("--out-dir", required=True)
+    ap.add_argument("--epochs", type=int, default=30)
+    ap.add_argument("--rollout-weight", type=float, default=0.3)
+    ap.add_argument("--rollout-horizon", type=int, default=30)
+    ap.add_argument("--rollout-anchors-per-epoch", type=int, default=200)
+    args = ap.parse_args()
+
+    with open(args.params) as f:
+        params = BicycleParams(**json.load(f)["params"])
+    noise = NoiseModel.load(args.noise)
+
+    feats_tr, tgt_tr, bag_dicts_tr, dt = _stack_training_data(args.train, params)
+    feats_va, tgt_va, bag_dicts_va, _ = _stack_training_data(args.val, params)
+    print(f"train ticks={feats_tr.shape[0]}  val ticks={feats_va.shape[0]}")
+
+    norm = Normaliser(mean=feats_tr.mean(0).astype(np.float32),
+                      std=feats_tr.std(0).astype(np.float32))
+    x_tr = torch.from_numpy(norm.apply(feats_tr))
+    y_tr = torch.from_numpy(tgt_tr)
+    x_va = torch.from_numpy(norm.apply(feats_va))
+    y_va = torch.from_numpy(tgt_va)
+
+    net = ResidualMLP()
+    opt = torch.optim.Adam(net.parameters(), lr=3e-3)
+    sched = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=args.epochs)
+    w = torch.tensor([1.0 / max(noise.sigma_v, 1e-3) ** 2,
+                      1.0 / max(noise.sigma_psidot, 1e-3) ** 2], dtype=torch.float32)
+
+    rng = np.random.default_rng(0)
+    best_val = float("inf")
+    history = {"train_single": [], "val_single": [], "val_rollout": []}
+
+    # Precompute steer state at every tick for O(1) anchor warm-up
+    steer_actual_tr = _precompute_steer_actual(params, bag_dicts_tr[0]["cmd_steer"], dt)
+    steer_actual_va = _precompute_steer_actual(params, bag_dicts_va[0]["cmd_steer"], dt)
+    bicycle_tr = (params, steer_actual_tr)
+    bicycle_va = (params, steer_actual_va)
+
+    valid_tr = _valid_anchor_indices(bag_dicts_tr[0], args.rollout_horizon)
+    valid_va = _valid_anchor_indices(bag_dicts_va[0], args.rollout_horizon)
+    print(f"valid rollout anchors: train={valid_tr.size}  val={valid_va.size}")
+
+    # Fixed validation anchors — same set every epoch so val_rollout is a
+    # consistent metric for early-stopping. Use 4× the per-epoch training
+    # anchors for a tighter estimate.
+    val_rng = np.random.default_rng(12345)
+    val_anchor_count = max(args.rollout_anchors_per_epoch * 4, 400)
+    fixed_val_anchors = val_rng.choice(valid_va, size=val_anchor_count, replace=True)
+
+    for epoch in range(args.epochs):
+        net.train()
+        idx = torch.randperm(x_tr.shape[0])
+        epoch_single = 0.0
+        for s in range(0, x_tr.shape[0], 4096):
+            b = idx[s:s + 4096]
+            single = ((net(x_tr[b]) - y_tr[b]) ** 2 * w).mean()
+            anchors = rng.choice(valid_tr, size=args.rollout_anchors_per_epoch, replace=True)
+            rollout = _rollout_loss(net, norm, bag_dicts_tr[0], anchors,
+                                    args.rollout_horizon, dt,
+                                    noise.sigma_v, noise.sigma_psidot,
+                                    bicycle_tr)
+            loss = single + args.rollout_weight * rollout
+            opt.zero_grad()
+            loss.backward()
+            opt.step()
+            epoch_single += single.item() * b.numel()
+        epoch_single /= x_tr.shape[0]
+        sched.step()
+
+        net.eval()
+        with torch.no_grad():
+            val_single = ((net(x_va) - y_va) ** 2 * w).mean().item()
+            val_rollout = _rollout_loss(net, norm, bag_dicts_va[0], fixed_val_anchors,
+                                        args.rollout_horizon, dt,
+                                        noise.sigma_v, noise.sigma_psidot,
+                                        bicycle_va).item()
+        history["train_single"].append(epoch_single)
+        history["val_single"].append(val_single)
+        history["val_rollout"].append(val_rollout)
+        print(f"epoch {epoch:3d}  train_single={epoch_single:.4f}  "
+              f"val_single={val_single:.4f}  val_rollout={val_rollout:.4f}")
+
+        if val_single < best_val:
+            best_val = val_single
+            os.makedirs(args.out_dir, exist_ok=True)
+            torch.save(net.state_dict(), os.path.join(args.out_dir, "mlp.pt"))
+            with open(os.path.join(args.out_dir, "norm.json"), "w") as f:
+                json.dump(norm.to_json(), f, indent=2)
+    with open(os.path.join(args.out_dir, "training_history.json"), "w") as f:
+        json.dump(history, f, indent=2)
+    print(f"best val_single = {best_val:.4f}")
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    REPO = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    if REPO not in sys.path:
+        sys.path.insert(0, REPO)
+    main()
diff --git a/sim/runner.py b/sim/runner.py
new file mode 100644
index 00000000..8adddc1b
--- /dev/null
+++ b/sim/runner.py
@@ -0,0 +1,1117 @@
+"""Cluster-friendly runner for the offline kart simulator.
+
+Subcommands:
+  one               — run a single MPC param set; optionally render a GIF
+  sweep             — Optuna sweep over MPC params, parallel across trials
+  render            — render a saved trajectory.npz as a GIF
+  compare-residuals — compare multiple residual-learner modes side-by-side
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import math
+import os
+import sys
+import time
+from typing import Optional
+
+HERE = os.path.dirname(os.path.abspath(__file__))
+REPO = os.path.dirname(HERE)
+if REPO not in sys.path:
+    sys.path.insert(0, REPO)
+
+import numpy as np
+
+
+def load_line(path: str):
+    line = []
+    with open(path) as f:
+        for row in f:
+            parts = row.strip().split(",")
+            if len(parts) < 6:
+                continue
+            try:
+                line.append(tuple(float(x) for x in parts))
+            except ValueError:
+                continue
+    return line
+
+
+# ---------------------------------------------------------------------------
+# Sweep objective helpers (top-level so they can be serialized by joblib/loky)
+# ---------------------------------------------------------------------------
+def _sample_params(trial, defaults: dict) -> dict:
+    """Optuna trial -> mpc params dict. Mirrors optimize_mpc_sim.sample_params."""
+    p = dict(defaults)
+    p["horizon_steps"] = trial.suggest_int("horizon_steps", 10, 40)
+    p["num_samples"] = trial.suggest_int("num_samples", 32, 128, step=8)
+    p["steer_sigma_deg"] = trial.suggest_float("steer_sigma_deg", 2.0, 12.0)
+    p["accel_sigma_mps2"] = trial.suggest_float("accel_sigma_mps2", 0.3, 2.5)
+    p["proj_window_fwd"] = trial.suggest_int("proj_window_fwd", 30, 120)
+    p["target_speed_pct"] = trial.suggest_float("target_speed_pct", 0.4, 1.0)
+    p["w_d"] = trial.suggest_float("w_d", 5.0, 500.0, log=True)
+    p["w_heading"] = trial.suggest_float("w_heading", 1.0, 200.0, log=True)
+    p["w_speed"] = trial.suggest_float("w_speed", 0.5, 50.0, log=True)
+    p["w_delta_rate"] = trial.suggest_float("w_delta_rate", 5.0, 300.0, log=True)
+    p["w_boundary"] = trial.suggest_float("w_boundary", 100.0, 10000.0, log=True)
+    p["w_progress"] = trial.suggest_float("w_progress", 0.1, 50.0, log=True)
+    p["w_terminal_d"] = trial.suggest_float("w_terminal_d", 20.0, 600.0, log=True)
+    p["w_terminal_heading"] = trial.suggest_float("w_terminal_heading", 10.0, 400.0, log=True)
+    p["w_a_lat"] = trial.suggest_float("w_a_lat", 5.0, 300.0, log=True)
+    p["w_edge"] = trial.suggest_float("w_edge", 10.0, 2000.0, log=True)
+    p["edge_inner_frac"] = trial.suggest_float("edge_inner_frac", 0.2, 0.95)
+    return p
+
+
+def _sample_params_from_spec(spec: dict):
+    """Return a _sample_params-compatible function built from a JSON spec dict.
+
+    The spec schema::
+
+        {
+          "fixed":  {str: any},           # optional — applied to DEFAULT_MPC first
+          "search": [                      # required
+            {"name": str, "dist": str, "lo": float, "hi": float, ...}
+          ]
+        }
+
+    Supported dist values: uniform, log_uniform, int, int_log, categorical.
+    Categorical entries carry a "choices" list instead of lo/hi.
+    Int entries may carry an optional "step" field.
+    """
+    fixed = spec.get("fixed", {})
+    search = spec["search"]
+
+    def _sampler(trial, defaults: dict) -> dict:
+        p = dict(defaults)
+        p.update(fixed)
+        for entry in search:
+            name = entry["name"]
+            dist = entry["dist"]
+            if dist == "uniform":
+                p[name] = trial.suggest_float(name, entry["lo"], entry["hi"])
+            elif dist == "log_uniform":
+                p[name] = trial.suggest_float(name, entry["lo"], entry["hi"], log=True)
+            elif dist == "int":
+                step = entry.get("step", 1)
+                p[name] = trial.suggest_int(name, int(entry["lo"]), int(entry["hi"]), step=step)
+            elif dist == "int_log":
+                p[name] = trial.suggest_int(name, int(entry["lo"]), int(entry["hi"]), log=True)
+            elif dist == "categorical":
+                p[name] = trial.suggest_categorical(name, entry["choices"])
+            else:
+                raise ValueError(f"unknown dist {dist!r} for param {name!r}")
+        return p
+
+    return _sampler
+
+
+def _sweep_objective(
+    trial,
+    line,
+    defaults: dict,
+    sim_backend: str,
+    datasim_model_dir: str,
+    datasim_use_mlp: bool,
+    noise_mode: str,
+    n_laps: int,
+    seeds_per_trial: int,
+    safety_margin: float,
+    sample_fn=None,
+):
+    """Top-level Optuna objective — defined at module level for n_jobs > 1."""
+    from sim.closed_loop import simulate
+
+    _fn = sample_fn if sample_fn is not None else _sample_params
+    params = _fn(trial, defaults)
+    results = []
+    for seed in range(seeds_per_trial):
+        r = simulate(
+            line, params,
+            sim_backend=sim_backend,
+            datasim_model_dir=datasim_model_dir,
+            datasim_use_mlp=datasim_use_mlp,
+            noise_mode=noise_mode,
+            rng_seed=seed,
+            n_laps=n_laps,
+            safety_margin=safety_margin,
+        )
+        results.append(r)
+
+    all_complete = all(r.completed_laps >= n_laps and not r.aborted for r in results)
+    worst_max_d = max(r.max_abs_d for r in results)
+    worst_laps = min(r.completed_laps for r in results)
+    lap_times = [r.avg_lap_time_s for r in results if r.completed_laps >= n_laps]
+    mean_lap_time = float(np.mean(lap_times)) if lap_times else float("inf")
+    worst_lap_time = float(np.max(lap_times)) if lap_times else float("inf")
+    mean_speed = float(np.mean([r.avg_speed for r in results]))
+
+    track_half = params.get("track_half_width_m", 2.0)
+    kart_half = params.get("kart_half_width_m", 0.5025)
+    limit = track_half - kart_half - safety_margin
+    safe_all = all_complete and worst_max_d <= limit
+
+    trial.set_user_attr("laps_min", worst_laps)
+    trial.set_user_attr("mean_lap_time_s", mean_lap_time)
+    trial.set_user_attr("worst_lap_time_s", worst_lap_time)
+    trial.set_user_attr("worst_max_abs_d", worst_max_d)
+    trial.set_user_attr("mean_speed", mean_speed)
+    trial.set_user_attr("safe", safe_all)
+    trial.set_user_attr("all_complete", all_complete)
+
+    if not all_complete:
+        shortfall = sum(max(0, n_laps - r.completed_laps) for r in results)
+        return 1.0e6 + 1.0e4 * shortfall
+    if not safe_all:
+        over = max(0.0, worst_max_d - limit)
+        return 1.0e5 + 1.0e3 * over + mean_lap_time
+    return mean_lap_time
+
+
+# ---------------------------------------------------------------------------
+# Subcommand: one
+# ---------------------------------------------------------------------------
+def _add_one_parser(sub):
+    p = sub.add_parser("one", help="run a single MPC param set")
+    p.add_argument("--line", default="data/racing_line/line6.csv")
+    p.add_argument("--sim", choices=["bicycle", "datasim"], default="datasim")
+    p.add_argument("--datasim-model-dir", default="sim/model")
+    p.add_argument("--no-mlp", action="store_true", help="disable MLP residual in DataSim (use ID-only bicycle path). Recommended for closed-loop sims — the MLP has a small positive dv bias that compounds.")
+
+    p.add_argument("--noise-mode", choices=["none", "gaussian", "bootstrap"], default="none")
+    p.add_argument("--params-json", default=None, help="JSON overlay onto DEFAULT_MPC")
+    p.add_argument("--laps", type=int, default=2)
+    p.add_argument("--seed", type=int, default=0)
+    p.add_argument("--torch-threads", type=int, default=0)
+    p.add_argument("--out-dir", default=None)
+    p.add_argument("--render", action="store_true")
+    p.add_argument("--fps", type=int, default=30)
+    p.add_argument("--max-frames", type=int, default=5000,
+                   help="hard cap on GIF frames (default 5000)")
+    p.add_argument("--realtime-factor", type=float, default=1.0,
+                   help="sim-seconds per GIF-second; 1.0=real time, 10=10x sped up")
+    p.add_argument("--bag-npz", default=None, metavar="PATH",
+                   help="aligned bag .npz (sim/load_bags.py output) — overlay real "
+                        "kart trajectory + lateral-offset stats against the sim")
+
+
+def cmd_one(args):
+    from sim.closed_loop import DEFAULT_MPC, simulate
+
+    if args.torch_threads > 0:
+        import torch
+        torch.set_num_threads(args.torch_threads)
+
+    out_dir = args.out_dir or os.path.join("runs", f"one-{int(time.time())}")
+    os.makedirs(out_dir, exist_ok=True)
+
+    line_path = args.line if os.path.isabs(args.line) else os.path.join(REPO, args.line)
+    line = load_line(line_path)
+    if len(line) < 10:
+        raise SystemExit(f"racing line at {line_path} too short / not found")
+
+    mpc_params = dict(DEFAULT_MPC)
+    if args.params_json:
+        with open(args.params_json) as f:
+            overlay = json.load(f)
+        mpc_params.update(overlay)
+
+    result = simulate(
+        line, mpc_params,
+        sim_backend=args.sim,
+        datasim_model_dir=args.datasim_model_dir,
+        datasim_use_mlp=not args.no_mlp,
+        noise_mode=args.noise_mode,
+        rng_seed=args.seed,
+        n_laps=args.laps,
+    )
+
+    # Save params
+    params_out = dict(mpc_params)
+    params_out["_meta"] = {
+        "sim": args.sim,
+        "noise_mode": args.noise_mode,
+        "seed": args.seed,
+        "laps": args.laps,
+        "line": args.line,
+    }
+    with open(os.path.join(out_dir, "params.json"), "w") as f:
+        json.dump(params_out, f, indent=2)
+
+    # Save metrics
+    metrics = {
+        "completed_laps": result.completed_laps,
+        "total_time_s": result.total_time_s,
+        "avg_lap_time_s": result.avg_lap_time_s,
+        "max_abs_d": result.max_abs_d,
+        "avg_speed": result.avg_speed,
+        "safe": result.safe,
+        "aborted": result.aborted,
+    }
+    with open(os.path.join(out_dir, "metrics.json"), "w") as f:
+        json.dump(metrics, f, indent=2)
+
+    # Save trajectory
+    np.savez(
+        os.path.join(out_dir, "trajectory.npz"),
+        xs=result.xs, ys=result.ys, yaws=result.yaws,
+        vs=result.vs, ds=result.ds, times=result.times,
+    )
+
+    if args.render:
+        from sim.render import render_gif
+        render_gif(
+            result.xs, result.ys, result.yaws, result.vs, result.ds, result.times,
+            line_path=line_path,
+            out_path=os.path.join(out_dir, "out.gif"),
+            mpc_params=mpc_params,
+            fps=args.fps,
+            max_frames=args.max_frames,
+            realtime_factor=args.realtime_factor,
+            title=f"{args.sim} sim — {result.completed_laps} laps, {result.avg_lap_time_s:.1f}s/lap",
+        )
+
+    print(
+        f"one: laps={result.completed_laps} avg_lap={result.avg_lap_time_s:.2f}s "
+        f"max|d|={result.max_abs_d:.3f}m avg_v={result.avg_speed:.2f}m/s "
+        f"safe={result.safe} aborted={result.aborted} -> {out_dir}"
+    )
+
+    if args.bag_npz:
+        _compare_sim_to_bag(
+            sim_xs=result.xs, sim_ys=result.ys, sim_vs=result.vs, sim_ds=result.ds,
+            bag_npz=args.bag_npz, line_path=line_path, out_dir=out_dir,
+            sim_label=f"{args.sim} sim on {os.path.basename(args.line)}",
+        )
+
+
+def _compare_sim_to_bag(*, sim_xs, sim_ys, sim_vs, sim_ds,
+                        bag_npz: str, line_path: str, out_dir: str,
+                        sim_label: str) -> None:
+    """Overlay sim trajectory against a real bag's /odom on the same line.
+    Writes comparison.png + comparison.json into out_dir."""
+    import csv as _csv
+    import matplotlib
+    matplotlib.use("Agg")
+    import matplotlib.pyplot as plt
+    from sim.load_bags import AlignedBag
+
+    # Load line geometry
+    with open(line_path) as f:
+        rows = list(_csv.reader(f))[1:]
+    lx = np.array([float(r[1]) for r in rows])
+    ly = np.array([float(r[2]) for r in rows])
+    dx = np.empty_like(lx); dy = np.empty_like(ly)
+    dx[1:-1] = lx[2:] - lx[:-2]; dy[1:-1] = ly[2:] - ly[:-2]
+    dx[0] = lx[1] - lx[0]; dy[0] = ly[1] - ly[0]
+    dx[-1] = lx[-1] - lx[-2]; dy[-1] = ly[-1] - ly[-2]
+    L = np.hypot(dx, dy); L = np.where(L < 1e-9, 1.0, L)
+    nx, ny = -dy / L, dx / L  # left-hand normal
+
+    # Load bag
+    bag = AlignedBag.from_npz(bag_npz if os.path.isabs(bag_npz) else os.path.join(REPO, bag_npz))
+    auto = bag.autonomous
+    bag_x, bag_y, bag_v = bag.odom_x[auto], bag.odom_y[auto], bag.v[auto]
+
+    # Global nearest-point projection (figure-8 safe)
+    def _project(xs, ys):
+        out = np.empty(len(xs))
+        for i in range(len(xs)):
+            d = (lx - xs[i]) ** 2 + (ly - ys[i]) ** 2
+            j = int(np.argmin(d))
+            out[i] = (xs[i] - lx[j]) * nx[j] + (ys[i] - ly[j]) * ny[j]
+        return out
+
+    bag_d = _project(bag_x, bag_y)
+    drive_mask = bag_v > 3.0  # filter stops
+
+    # Track edges
+    HW = 2.0
+    left_x = lx + nx * HW; left_y = ly + ny * HW
+    right_x = lx - nx * HW; right_y = ly - ny * HW
+
+    fig, axes = plt.subplots(1, 2, figsize=(16, 8))
+    sim_max_d = float(np.abs(sim_ds).max())
+    bag_max_d = float(np.abs(bag_d).max())
+    sim_mean_d = float(np.abs(sim_ds).mean())
+    bag_mean_d = float(np.abs(bag_d).mean())
+    bag_mean_d_drive = float(np.abs(bag_d[drive_mask]).mean()) if drive_mask.any() else float("nan")
+
+    for ax, label, x, y in [
+        (axes[0],
+         f"{sim_label}\n|d| mean={sim_mean_d:.3f}m  max={sim_max_d:.2f}m  v_mean={sim_vs.mean():.2f} m/s",
+         sim_xs, sim_ys),
+        (axes[1],
+         f"REAL bag {os.path.basename(bag_npz)}\n|d| mean={bag_mean_d:.3f}m  (driving v>3 m/s: {bag_mean_d_drive:.3f}m)  max={bag_max_d:.2f}m",
+         bag_x, bag_y),
+    ]:
+        ax.plot(left_x, left_y, "#444", lw=1.0)
+        ax.plot(right_x, right_y, "#444", lw=1.0)
+        ax.plot(lx, ly, "#999", lw=0.5, ls="--", label="racing line")
+        ax.plot(x, y, "#0a7", lw=0.7, alpha=0.7, label="kart path")
+        ax.set_aspect("equal")
+        ax.set_title(label, fontsize=11)
+        ax.grid(alpha=0.3)
+        ax.legend(loc="lower right", fontsize=8, frameon=False)
+    fig.tight_layout()
+    out_png = os.path.join(out_dir, "comparison.png")
+    fig.savefig(out_png, dpi=120)
+    plt.close(fig)
+
+    stats = {
+        "sim": {
+            "ticks": int(sim_xs.size),
+            "v_mean": float(sim_vs.mean()),
+            "v_max": float(sim_vs.max()),
+            "abs_d_mean": sim_mean_d,
+            "abs_d_max": sim_max_d,
+        },
+        "bag": {
+            "ticks_autonomous": int(auto.sum()),
+            "ticks_driving_v_gt_3": int(drive_mask.sum()),
+            "v_mean_all": float(bag_v.mean()),
+            "v_mean_driving": float(bag_v[drive_mask].mean()) if drive_mask.any() else None,
+            "v_max": float(bag_v.max()),
+            "abs_d_mean_all": bag_mean_d,
+            "abs_d_mean_driving": bag_mean_d_drive,
+            "abs_d_max": bag_max_d,
+        },
+        "line": os.path.basename(line_path),
+        "bag_file": os.path.basename(bag_npz),
+    }
+    with open(os.path.join(out_dir, "comparison.json"), "w") as f:
+        json.dump(stats, f, indent=2)
+
+    print(
+        f"compare-to-bag: sim |d|_mean={sim_mean_d:.3f}m max={sim_max_d:.2f}m"
+        f"  |  bag(driving) |d|_mean={bag_mean_d_drive:.3f}m max={bag_max_d:.2f}m"
+        f"  -> {out_png}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Subcommand: sweep
+# ---------------------------------------------------------------------------
+def _add_sweep_parser(sub):
+    p = sub.add_parser("sweep", help="Optuna sweep over MPC params")
+    p.add_argument("--line", default="data/racing_line/line6.csv")
+    p.add_argument("--no-mlp", action="store_true", help="disable MLP residual in DataSim (use ID-only bicycle path). Recommended for closed-loop sims — the MLP has a small positive dv bias that compounds.")
+
+    p.add_argument("--sim", choices=["bicycle", "datasim"], default="datasim")
+    p.add_argument("--datasim-model-dir", default="sim/model")
+    p.add_argument("--noise-mode", choices=["none", "gaussian", "bootstrap"], default="none")
+    p.add_argument("--laps", type=int, default=2)
+    p.add_argument("--torch-threads", type=int, default=0)
+    p.add_argument("--trials", type=int, default=120)
+    p.add_argument("--n-jobs", type=int, default=max(1, (os.cpu_count() or 2) // 2))
+    p.add_argument("--startup", type=int, default=20)
+    p.add_argument("--seeds-per-trial", type=int, default=3)
+    p.add_argument("--safety-margin", type=float, default=0.2)
+    p.add_argument("--log", choices=["summary", "full"], default="summary")
+    p.add_argument("--render-best", action="store_true")
+    p.add_argument("--out-dir", default=None)
+    p.add_argument("--resume", action="store_true")
+    p.add_argument("--seed", type=int, default=42)
+    p.add_argument("--sweep-spec", default=None, metavar="PATH",
+                   help="JSON search-space spec; when set replaces the hardcoded _sample_params")
+
+
+def cmd_sweep(args):
+    import optuna
+    import functools
+    from sim.closed_loop import DEFAULT_MPC
+
+    optuna.logging.set_verbosity(optuna.logging.WARNING)
+
+    if args.torch_threads > 0:
+        import torch
+        torch.set_num_threads(args.torch_threads)
+
+    out_dir = args.out_dir or os.path.join("sweeps", f"sweep-{int(time.time())}")
+    os.makedirs(out_dir, exist_ok=True)
+
+    line_path = args.line if os.path.isabs(args.line) else os.path.join(REPO, args.line)
+    line = load_line(line_path)
+    if len(line) < 10:
+        raise SystemExit(f"racing line at {line_path} too short / not found")
+
+    sampler = optuna.samplers.TPESampler(seed=args.seed, n_startup_trials=args.startup)
+    study = optuna.create_study(direction="minimize", sampler=sampler)
+
+    # Resume: load prior trials from trials.json
+    trials_path = os.path.join(out_dir, "trials.json")
+    if args.resume and os.path.exists(trials_path):
+        with open(trials_path) as f:
+            prior = json.load(f)
+        for t in prior:
+            study.add_trial(optuna.trial.create_trial(
+                params=t.get("params", {}),
+                distributions={},
+                value=t.get("value", float("inf")),
+            ))
+
+    # Pick sample function: spec-driven or hardcoded default
+    if args.sweep_spec:
+        with open(args.sweep_spec) as _sf:
+            _spec = json.load(_sf)
+        sample_fn = _sample_params_from_spec(_spec)
+    else:
+        sample_fn = None  # _sweep_objective falls back to _sample_params
+
+    # Bind with functools.partial over a module-level function (required for n_jobs > 1)
+    objective = functools.partial(
+        _sweep_objective,
+        line=line,
+        defaults=dict(DEFAULT_MPC),
+        sim_backend=args.sim,
+        datasim_model_dir=args.datasim_model_dir,
+        datasim_use_mlp=not args.no_mlp,
+        noise_mode=args.noise_mode,
+        n_laps=args.laps,
+        seeds_per_trial=args.seeds_per_trial,
+        safety_margin=args.safety_margin,
+        sample_fn=sample_fn,
+    )
+
+    jsonl_path = os.path.join(out_dir, "trials.jsonl")
+    log_mode = args.log
+
+    def _per_trial_cb(study, trial):
+        attrs = trial.user_attrs
+        safe = attrs.get("safe", False)
+        laps_min = attrs.get("laps_min", 0)
+        lap_t = attrs.get("mean_lap_time_s", float("inf"))
+        max_d = attrs.get("worst_max_abs_d", float("inf"))
+        all_complete = attrs.get("all_complete", False)
+        tag = "OK    " if safe else ("UNSAFE" if all_complete else "FAIL  ")
+
+        # Write JSONL line
+        if log_mode == "summary":
+            rec = {
+                "number": trial.number,
+                "value": trial.value,
+                "mean_lap_time_s": lap_t,
+                "worst_max_abs_d": max_d,
+                "safe": safe,
+                "all_complete": all_complete,
+            }
+        else:
+            rec = {
+                "number": trial.number,
+                "value": trial.value,
+                "params": trial.params,
+                **attrs,
+            }
+        with open(jsonl_path, "a") as jf:
+            jf.write(json.dumps(rec) + "\n")
+
+        # Progress line (<=100 chars)
+        msg = (
+            f"  trial {trial.number:3d} [{tag}] laps_min={laps_min} "
+            f"lap_t={lap_t:.2f}s  max|d|={max_d:.3f}m  obj={trial.value:.3g}"
+        )
+        print(msg[:100])
+
+    print(
+        f"Optuna sweep: {args.trials} trials, {args.laps}-lap eval, "
+        f"{args.seeds_per_trial} seed(s), {args.n_jobs} job(s), "
+        f"safety={args.safety_margin}m -> {out_dir}"
+    )
+    t0 = time.time()
+    study.optimize(objective, n_trials=args.trials, n_jobs=args.n_jobs,
+                   callbacks=[_per_trial_cb])
+    print(f"Sweep done in {time.time() - t0:.1f}s")
+
+    # Save all trials
+    all_trials = sorted([
+        {
+            "number": t.number,
+            "value": t.value,
+            "params": t.params,
+            **t.user_attrs,
+        }
+        for t in study.trials
+    ], key=lambda x: x["number"])
+    with open(trials_path, "w") as f:
+        json.dump(all_trials, f, indent=2)
+
+    # Find best safe trial
+    safe_trials = [t for t in study.trials if t.user_attrs.get("safe")]
+    best_json_path = os.path.join(out_dir, "best.json")
+    if safe_trials:
+        best_t = min(safe_trials, key=lambda t: t.user_attrs.get("mean_lap_time_s", float("inf")))
+        with open(best_json_path, "w") as f:
+            json.dump({
+                "trial_number": best_t.number,
+                "params": best_t.params,
+                "metrics": dict(best_t.user_attrs),
+            }, f, indent=2)
+
+    # Write summary.txt
+    summary_path = os.path.join(out_dir, "summary.txt")
+    with open(summary_path, "w") as f:
+        f.write(f"Sweep: {len(all_trials)} trials total, {len(safe_trials)} safe\n\n")
+        if safe_trials:
+            top10 = sorted(safe_trials, key=lambda t: t.user_attrs.get("mean_lap_time_s", float("inf")))[:10]
+            f.write("Top-10 safe trials by mean lap time:\n")
+            for t in top10:
+                attrs = t.user_attrs
+                f.write(
+                    f"  #{t.number:3d}  lap_t={attrs.get('mean_lap_time_s', float('inf')):.3f}s"
+                    f"  max|d|={attrs.get('worst_max_abs_d', float('inf')):.3f}m"
+                    f"  speed={attrs.get('mean_speed', 0):.2f}m/s\n"
+                )
+            best_lap = top10[0].user_attrs.get("mean_lap_time_s", float("inf"))
+            f.write(f"\n{len(safe_trials)} safe / {len(all_trials)} trials, best lap = {best_lap:.3f}s\n")
+        else:
+            f.write("No safe trials found.\n")
+
+    if args.render_best and safe_trials:
+        from sim.closed_loop import DEFAULT_MPC, simulate
+        from sim.render import render_gif
+
+        best_params = dict(DEFAULT_MPC)
+        best_params.update(best_t.params)
+        result = simulate(
+            line, best_params,
+            sim_backend=args.sim,
+            datasim_model_dir=args.datasim_model_dir,
+            datasim_use_mlp=not args.no_mlp,
+            noise_mode=args.noise_mode,
+            rng_seed=0,
+            n_laps=args.laps,
+            max_steps=200000,
+        )
+        np.savez(
+            os.path.join(out_dir, "best_trajectory.npz"),
+            xs=result.xs, ys=result.ys, yaws=result.yaws,
+            vs=result.vs, ds=result.ds, times=result.times,
+        )
+        render_gif(
+            result.xs, result.ys, result.yaws, result.vs, result.ds, result.times,
+            line_path=line_path,
+            out_path=os.path.join(out_dir, "best.gif"),
+            mpc_params=best_params,
+            title=f"Best trial #{best_t.number} — {result.completed_laps} laps",
+        )
+
+    n_safe = len(safe_trials)
+    best_lap_str = (
+        f"{min(t.user_attrs.get('mean_lap_time_s', float('inf')) for t in safe_trials):.2f}s"
+        if safe_trials else "N/A"
+    )
+    print(
+        f"sweep: {len(all_trials)} trials, {n_safe} safe, "
+        f"best_lap={best_lap_str} -> {out_dir}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Subcommand: render
+# ---------------------------------------------------------------------------
+def _add_render_parser(sub):
+    p = sub.add_parser("render", help="render a saved trajectory.npz as a GIF")
+    p.add_argument("--trajectory", required=True)
+    p.add_argument("--line", required=True)
+    p.add_argument("--params", default=None)
+    p.add_argument("--out", required=True)
+    p.add_argument("--fps", type=int, default=30)
+    p.add_argument("--max-frames", type=int, default=5000,
+                   help="hard cap on GIF frames (default 5000)")
+    p.add_argument("--realtime-factor", type=float, default=1.0,
+                   help="sim-seconds per GIF-second; 1.0=real time, 10=10x sped up")
+    p.add_argument("--title", default="Kart sim")
+
+
+def cmd_render(args):
+    from sim.render import render_gif
+
+    data = np.load(args.trajectory)
+    mpc_params = None
+    if args.params:
+        with open(args.params) as f:
+            mpc_params = json.load(f)
+
+    render_gif(
+        xs=data["xs"],
+        ys=data["ys"],
+        yaws=data["yaws"],
+        vs=data["vs"],
+        ds=data["ds"],
+        times=data["times"],
+        line_path=args.line if os.path.isabs(args.line) else os.path.join(REPO, args.line),
+        out_path=args.out,
+        mpc_params=mpc_params,
+        fps=args.fps,
+        max_frames=args.max_frames,
+        realtime_factor=args.realtime_factor,
+        title=args.title,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Subcommand: compare-residuals
+# ---------------------------------------------------------------------------
+
+# Mode -> param overrides applied on top of the user's --params-json overlay.
+# IMPORTANT: keys go straight to MPCPlanner, which only keeps those starting
+# with `residual.` (mpc.py:153 strips that exact prefix). Do NOT use the
+# `mpc.residual.*` form used in the yaml — those reach the planner only via
+# the ROS node's prefix-rewriting, which doesn't run in this standalone path.
+#
+# `apply_min_samples_this_run` lowered from its 1200 default so apply mode
+# kicks in within the first few seconds of the comparison run; otherwise the
+# learner stays gated in shadow mode for ~20s and the comparison is mute.
+_APPLY_GATE_OVERRIDES = {
+    "residual.apply_min_samples_this_run": 200,
+    # Allow training samples even when the kart drifts somewhat off the line
+    # — the comparison runs see |d| up to ~8 m and the default 3 m rejects too
+    # much.
+    "residual.max_train_d_m": 10.0,
+}
+_RESIDUAL_MODE_OVERRIDES = {
+    "off":    {"residual.mode": "off"},
+    "rls":    {"residual.mode": "apply", "residual.gbm_enabled": False, **_APPLY_GATE_OVERRIDES},
+    "gbm":    {"residual.mode": "apply", "residual.gbm_enabled": True,  **_APPLY_GATE_OVERRIDES},
+    "both":   {"residual.mode": "apply", "residual.gbm_enabled": True,  **_APPLY_GATE_OVERRIDES},
+    "shadow": {"residual.mode": "shadow", **_APPLY_GATE_OVERRIDES},
+}
+
+_ALL_MODES = list(_RESIDUAL_MODE_OVERRIDES.keys())
+
+
+def _run_one_compare(mode: str, seed: int, args_dict: dict) -> dict:
+    """Top-level picklable job for joblib.Parallel."""
+    import traceback as _tb
+    from sim.closed_loop import DEFAULT_MPC, simulate
+
+    out_dir = args_dict["out_dir"]
+    run_dir = os.path.join(out_dir, f"{mode}_seed{seed}")
+    os.makedirs(run_dir, exist_ok=True)
+
+    mpc_params = dict(DEFAULT_MPC)
+    if args_dict.get("params_json"):
+        with open(args_dict["params_json"]) as f:
+            mpc_params.update(json.load(f))
+    mpc_params.update(_RESIDUAL_MODE_OVERRIDES.get(mode, {}))
+
+    line_path = args_dict["line_path"]
+    line = load_line(line_path)
+
+    try:
+        result = simulate(
+            line, mpc_params,
+            sim_backend=args_dict["sim"],
+            datasim_model_dir=args_dict["datasim_model_dir"],
+            datasim_use_mlp=args_dict.get("datasim_use_mlp", True),
+            noise_mode=args_dict["noise_mode"],
+            rng_seed=seed,
+            n_laps=args_dict["laps"],
+        )
+        aborted = result.aborted
+        abort_step = None
+        abort_xy = None
+        abort_t = None
+        max_d_step = int(np.argmax(np.abs(result.ds))) if result.ds.size else None
+        max_d_value = float(result.max_abs_d)
+        completed_laps = result.completed_laps
+        lap_time = result.avg_lap_time_s
+        avg_speed = result.avg_speed
+        total_time = result.total_time_s
+    except Exception:
+        aborted = True
+        abort_step = -1
+        abort_xy = None
+        abort_t = None
+        max_d_step = None
+        max_d_value = float("inf")
+        completed_laps = 0
+        lap_time = float("inf")
+        avg_speed = 0.0
+        total_time = 0.0
+        result = None
+        print(f"  [{mode} seed{seed}] EXCEPTION:\n{_tb.format_exc()}", flush=True)
+
+    # Hash the trajectory so we can detect identical runs across modes (the
+    # smoking gun if param overrides aren't reaching the planner).
+    import hashlib as _hashlib
+    traj_hash = ""
+    residual_stats: dict = {}
+    if result is not None:
+        np.savez(
+            os.path.join(run_dir, "trajectory.npz"),
+            xs=result.xs, ys=result.ys, yaws=result.yaws,
+            vs=result.vs, ds=result.ds, times=result.times,
+        )
+        h = _hashlib.sha1()
+        for arr in (result.xs, result.ys, result.yaws, result.vs):
+            h.update(np.ascontiguousarray(arr, dtype=np.float64).tobytes())
+        traj_hash = h.hexdigest()[:16]
+        residual_stats = dict(result.residual_stats or {})
+        metrics = {
+            "completed_laps": result.completed_laps,
+            "total_time_s": result.total_time_s,
+            "avg_lap_time_s": result.avg_lap_time_s,
+            "max_abs_d": result.max_abs_d,
+            "avg_speed": result.avg_speed,
+            "safe": result.safe,
+            "aborted": result.aborted,
+            "trajectory_hash": traj_hash,
+            "residual_stats": residual_stats,
+        }
+    else:
+        metrics = {
+            "completed_laps": 0,
+            "total_time_s": 0.0,
+            "avg_lap_time_s": float("inf"),
+            "max_abs_d": float("inf"),
+            "avg_speed": 0.0,
+            "safe": False,
+            "aborted": True,
+            "trajectory_hash": traj_hash,
+            "residual_stats": residual_stats,
+        }
+    with open(os.path.join(run_dir, "metrics.json"), "w") as f:
+        json.dump(metrics, f, indent=2)
+
+    saved_params = dict(mpc_params)
+    saved_params["_meta"] = {"mode": mode, "seed": seed}
+    with open(os.path.join(run_dir, "params.json"), "w") as f:
+        json.dump(saved_params, f, indent=2)
+
+    # One-line per-run diagnostic so the SLURM .out log tells the whole story.
+    rs = residual_stats
+    print(
+        f"  [{mode:<6} seed{seed}] "
+        f"residual.mode={mpc_params.get('residual.mode', '?'):<6} "
+        f"gbm_enabled={mpc_params.get('residual.gbm_enabled', '?')} "
+        f"-> effective={rs.get('effective_mode', '?'):<6} "
+        f"trained={rs.get('samples_trained', '?')} "
+        f"accepted={rs.get('samples_accepted_this_run', '?')} "
+        f"use_gbm={rs.get('use_gbm', '?')} "
+        f"|theta_s|={rs.get('theta_s_norm', 0.0):.3f} "
+        f"last_pred=({rs.get('last_pred_s', 0.0):+.3f},{rs.get('last_pred_d', 0.0):+.3f}) "
+        f"max|d|={metrics['max_abs_d']:.2f} "
+        f"laps={metrics['completed_laps']} "
+        f"hash={traj_hash}",
+        flush=True,
+    )
+
+    if result is not None and aborted and result.ds.size:
+        abort_step = len(result.ds) - 1
+        abort_xy = [float(result.xs[-1]), float(result.ys[-1])]
+        abort_t = float(result.times[-1])
+
+    return {
+        "mode": mode,
+        "seed": seed,
+        "run_dir": run_dir,
+        "aborted": aborted,
+        "abort_step": abort_step,
+        "abort_xy": abort_xy,
+        "abort_t": abort_t,
+        "max_d_step": max_d_step,
+        "max_d_value": max_d_value,
+        "completed_laps": completed_laps,
+        "lap_time": lap_time,
+        "avg_speed": avg_speed,
+        "total_time": total_time,
+        "trajectory_hash": traj_hash,
+        "residual_stats": residual_stats,
+        "result": result,  # kept in-process for render; not serialised by joblib
+    }
+
+
+def _add_compare_residuals_parser(sub):
+    p = sub.add_parser("compare-residuals",
+                       help="compare multiple residual-learner modes side-by-side")
+    p.add_argument("--line", default="data/racing_line/line6.csv")
+    p.add_argument("--no-mlp", action="store_true", help="disable MLP residual in DataSim (use ID-only bicycle path). Recommended for closed-loop sims — the MLP has a small positive dv bias that compounds.")
+    p.add_argument("--sim", choices=["bicycle", "datasim"], default="datasim")
+    p.add_argument("--datasim-model-dir", default="sim/model")
+    p.add_argument("--noise-mode", choices=["none", "gaussian", "bootstrap"], default="none")
+    p.add_argument("--params-json", default=None, metavar="PATH",
+                   help="JSON overlay onto DEFAULT_MPC applied to every mode")
+    p.add_argument("--modes", nargs="+", default=["off", "rls", "gbm", "both", "shadow"],
+                   metavar="MODE")
+    p.add_argument("--laps", type=int, default=5)
+    p.add_argument("--seeds", type=int, default=2, metavar="N")
+    p.add_argument("--n-jobs", type=int, default=max(1, (os.cpu_count() or 2) // 2))
+    p.add_argument("--torch-threads", type=int, default=0)
+    p.add_argument("--out-dir", default=None)
+    p.add_argument("--render", action="store_true",
+                   help="write comparison.png with 4 trajectories overlaid")
+
+
+def cmd_compare_residuals(args):
+    from joblib import Parallel, delayed
+
+    if args.torch_threads > 0:
+        import torch
+        torch.set_num_threads(args.torch_threads)
+
+    ts = time.strftime("%Y%m%d-%H%M%S")
+    out_dir = args.out_dir or os.path.join("runs", f"residual-compare-{ts}")
+    os.makedirs(out_dir, exist_ok=True)
+
+    line_path = args.line if os.path.isabs(args.line) else os.path.join(REPO, args.line)
+    line = load_line(line_path)
+    if len(line) < 10:
+        raise SystemExit(f"racing line at {line_path} too short / not found")
+
+    # Validate modes
+    for m in args.modes:
+        if m not in _RESIDUAL_MODE_OVERRIDES:
+            raise SystemExit(f"unknown mode {m!r}; valid: {_ALL_MODES}")
+
+    args_dict = {
+        "out_dir": out_dir,
+        "line_path": line_path,
+        "sim": args.sim,
+        "datasim_model_dir": args.datasim_model_dir,
+        "datasim_use_mlp": not args.no_mlp,
+        "noise_mode": args.noise_mode,
+        "params_json": args.params_json,
+        "laps": args.laps,
+    }
+
+    jobs = [(mode, seed) for mode in args.modes for seed in range(args.seeds)]
+    print(
+        f"compare-residuals: {len(jobs)} runs "
+        f"({len(args.modes)} modes × {args.seeds} seed(s)), "
+        f"n_jobs={args.n_jobs} -> {out_dir}"
+    )
+
+    raw_results = Parallel(n_jobs=args.n_jobs)(
+        delayed(_run_one_compare)(mode, seed, args_dict)
+        for mode, seed in jobs
+    )
+
+    # ------------------------------------------------------------------
+    # Aggregate per-mode stats
+    # ------------------------------------------------------------------
+    from collections import defaultdict
+    mode_results = defaultdict(list)
+    for r in raw_results:
+        mode_results[r["mode"]].append(r)
+
+    track_half = 2.0
+    kart_half = 0.5025
+    safety_margin = 0.2
+    limit = track_half - kart_half - safety_margin
+
+    summary = {}
+    failure_points = []
+
+    for mode in args.modes:
+        rs = mode_results[mode]
+        laps_vals = [r["completed_laps"] for r in rs]
+        lap_times = [r["lap_time"] for r in rs if r["lap_time"] != float("inf")]
+        max_ds = [r["max_d_value"] for r in rs if r["max_d_value"] != float("inf")]
+        aborted_count = sum(1 for r in rs if r["aborted"])
+        summary[mode] = {
+            "laps_mean": float(np.mean(laps_vals)) if laps_vals else 0.0,
+            "laps_min": int(min(laps_vals)) if laps_vals else 0,
+            "lap_time_mean": float(np.mean(lap_times)) if lap_times else float("inf"),
+            "lap_time_std": float(np.std(lap_times)) if len(lap_times) > 1 else 0.0,
+            "max_abs_d_max": float(max(max_ds)) if max_ds else float("inf"),
+            "aborted_count": aborted_count,
+            "n_runs": len(rs),
+        }
+        for r in rs:
+            is_failure = r["aborted"] or r["max_d_value"] > limit
+            if is_failure:
+                failure_points.append({
+                    "mode": r["mode"],
+                    "seed": r["seed"],
+                    "abort_step": r["abort_step"],
+                    "abort_xy": r["abort_xy"],
+                    "abort_t": r["abort_t"],
+                    "max_d_step": r["max_d_step"],
+                    "max_d_value": r["max_d_value"],
+                })
+
+    with open(os.path.join(out_dir, "summary.json"), "w") as f:
+        json.dump(summary, f, indent=2)
+    with open(os.path.join(out_dir, "failure_points.json"), "w") as f:
+        json.dump(failure_points, f, indent=2)
+
+    # Cross-mode trajectory-hash check + per-mode learner-state aggregation.
+    # Even when the planner doesn't yet fold residual predictions back into
+    # its command output (current state of mpc.py — res_ps/res_pd reach
+    # telemetry only), the *learners* still differ across modes: off trains
+    # nothing, rls trains RLS, gbm trains both and may flip via the selector.
+    # Surface those differences so the comparison is informative even when
+    # trajectories collide.
+    hashes_per_mode: dict[str, set[str]] = {}
+    for mode in args.modes:
+        s = {r.get("trajectory_hash", "") for r in mode_results[mode] if r.get("trajectory_hash")}
+        if s:
+            hashes_per_mode[mode] = s
+    duplicate_pairs = []
+    mode_list = list(hashes_per_mode.keys())
+    for i, m1 in enumerate(mode_list):
+        for m2 in mode_list[i + 1:]:
+            shared = hashes_per_mode[m1] & hashes_per_mode[m2]
+            if shared:
+                duplicate_pairs.append((m1, m2))
+    if duplicate_pairs:
+        print(
+            "\nNOTE: some modes produced byte-identical trajectories. Predictions",
+            flush=True,
+        )
+        print(
+            "      reach telemetry but mpc.py does not yet fold res_ps/res_pd into",
+            flush=True,
+        )
+        print(
+            "      throttle/steering — the planner's control output is the same",
+            flush=True,
+        )
+        print(
+            "      across modes. Compare learner stats below for the real signal.",
+            flush=True,
+        )
+        for m1, m2 in duplicate_pairs:
+            print(f"      {m1} <-> {m2} share trajectories", flush=True)
+    else:
+        print("\nAll modes produced distinct trajectories.", flush=True)
+
+    # Aggregate residual-learner state per mode and write to summary.
+    learner_summary = {}
+    for mode in args.modes:
+        rs_list = [r.get("residual_stats") or {} for r in mode_results[mode]]
+        if not rs_list:
+            continue
+
+        def _avg(key, cast=float, default=0.0):
+            vals = [cast(rs[key]) for rs in rs_list if key in rs]
+            return float(np.mean(vals)) if vals else default
+
+        learner_summary[mode] = {
+            "effective_mode": rs_list[0].get("effective_mode", "?"),
+            "samples_trained_mean": _avg("samples_trained"),
+            "samples_accepted_mean": _avg("samples_accepted_this_run"),
+            "theta_s_norm_mean": _avg("theta_s_norm"),
+            "theta_d_norm_mean": _avg("theta_d_norm"),
+            "use_gbm_any": any(bool(rs.get("use_gbm")) for rs in rs_list),
+            "last_pred_s_mean": _avg("last_pred_s"),
+            "last_pred_d_mean": _avg("last_pred_d"),
+            "outliers_dropped_mean": _avg("outliers_dropped"),
+            "off_line_skipped_mean": _avg("off_line_skipped"),
+        }
+    with open(os.path.join(out_dir, "learner_summary.json"), "w") as f:
+        json.dump(learner_summary, f, indent=2)
+
+    print("\nResidual-learner state per mode (mean across seeds):", flush=True)
+    print(f"  {'mode':<8}{'eff':<8}{'trained':>9}{'accepted':>10}{'|theta_s|':>10}{'|theta_d|':>10}{'pred_s':>8}{'pred_d':>8}{'use_gbm':>8}", flush=True)
+    for mode, ls in learner_summary.items():
+        print(
+            f"  {mode:<8}{ls['effective_mode']:<8}"
+            f"{ls['samples_trained_mean']:>9.0f}"
+            f"{ls['samples_accepted_mean']:>10.0f}"
+            f"{ls['theta_s_norm_mean']:>10.3f}"
+            f"{ls['theta_d_norm_mean']:>10.3f}"
+            f"{ls['last_pred_s_mean']:>+8.3f}"
+            f"{ls['last_pred_d_mean']:>+8.3f}"
+            f"{str(ls['use_gbm_any']):>8}",
+            flush=True,
+        )
+
+    # Summary table
+    col_w = 10
+    header = f"{'mode':<12} {'laps_mean':>{col_w}} {'laps_min':>{col_w}} {'lap_t_mean':>{col_w}} {'lap_t_std':>{col_w}} {'max|d|_max':>{col_w}} {'aborted':>{col_w}}"
+    rows = [header, "-" * len(header)]
+    for mode in args.modes:
+        s = summary[mode]
+        lm = f"{s['laps_mean']:.2f}" if s['laps_mean'] != float("inf") else "inf"
+        li = str(s['laps_min'])
+        lt = f"{s['lap_time_mean']:.3f}" if s['lap_time_mean'] != float("inf") else "inf"
+        ls = f"{s['lap_time_std']:.3f}"
+        md = f"{s['max_abs_d_max']:.3f}" if s['max_abs_d_max'] != float("inf") else "inf"
+        ab = str(s['aborted_count'])
+        rows.append(f"{mode:<12} {lm:>{col_w}} {li:>{col_w}} {lt:>{col_w}} {ls:>{col_w}} {md:>{col_w}} {ab:>{col_w}}")
+
+    summary_txt = "\n".join(rows) + "\n"
+    with open(os.path.join(out_dir, "summary.txt"), "w") as f:
+        f.write(summary_txt)
+
+    # ------------------------------------------------------------------
+    # Optional comparison plot
+    # ------------------------------------------------------------------
+    if args.render:
+        _render_comparison(raw_results, args.modes, line_path, out_dir)
+
+    print(summary_txt)
+    print(f"wrote {out_dir}")
+
+
+def _render_comparison(raw_results, modes, line_path, out_dir):
+    """Write comparison.png: up to 4 trajectories overlaid on track."""
+    import matplotlib
+    matplotlib.use("Agg")
+    import matplotlib.pyplot as plt
+    from sim.render import load_line_xy, _tangent_normal
+
+    lx, ly = load_line_xy(line_path)
+    nx, ny = _tangent_normal(lx, ly)
+    track_half = 2.0
+    left_x = lx + nx * track_half
+    left_y = ly + ny * track_half
+    right_x = lx - nx * track_half
+    right_y = ly - ny * track_half
+
+    colors = ["#e63", "#36e", "#3a3", "#c3c", "#a80"]
+    fig, ax = plt.subplots(figsize=(8, 7))
+    ax.set_aspect("equal")
+    ax.axis("off")
+    ax.plot(left_x, left_y, color="#555", lw=0.8)
+    ax.plot(right_x, right_y, color="#555", lw=0.8)
+    ax.plot(lx, ly, color="#bbb", lw=0.5, ls="--")
+
+    # One trajectory per mode (first seed that has data)
+    plotted = []
+    for i, mode in enumerate(modes[:4]):
+        col = colors[i % len(colors)]
+        for r in raw_results:
+            if r["mode"] == mode and r.get("result") is not None and r["result"].xs.size:
+                xs = r["result"].xs
+                ys = r["result"].ys
+                ax.plot(xs, ys, color=col, lw=1.0, alpha=0.8, label=mode)
+                plotted.append(mode)
+                break
+
+    if plotted:
+        ax.legend(fontsize=7, loc="upper right")
+    ax.set_title("Residual mode comparison (first seed each)", fontsize=8)
+    out_path = os.path.join(out_dir, "comparison.png")
+    fig.savefig(out_path, dpi=120, bbox_inches="tight")
+    plt.close(fig)
+    print(f"wrote {out_path}")
+
+
+# ---------------------------------------------------------------------------
+# Main dispatch
+# ---------------------------------------------------------------------------
+def main():
+    ap = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    sub = ap.add_subparsers(dest="cmd", required=True)
+    _add_one_parser(sub)
+    _add_sweep_parser(sub)
+    _add_render_parser(sub)
+    _add_compare_residuals_parser(sub)
+    args = ap.parse_args()
+    {
+        "one": cmd_one,
+        "sweep": cmd_sweep,
+        "render": cmd_render,
+        "compare-residuals": cmd_compare_residuals,
+    }[args.cmd](args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/sim/slurm/residuals.sbatch b/sim/slurm/residuals.sbatch
new file mode 100644
index 00000000..d486d632
--- /dev/null
+++ b/sim/slurm/residuals.sbatch
@@ -0,0 +1,52 @@
+#!/bin/bash -l
+#SBATCH --job-name=sim-residual-compare
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=64
+#SBATCH --time=12:00:00
+#SBATCH -A lilly-rob1
+#SBATCH -p smallgpu
+#SBATCH --gres=gpu:1
+#SBATCH -o $SCRATCH/logs/sim-residual-compare_%j.out
+#SBATCH -e $SCRATCH/logs/sim-residual-compare_%j.err
+
+# Compare residual-learner modes (off, rls, gbm, both) with 4 seeds each.
+set -euo pipefail
+source $SCRATCH/venvs/autonomous-kart/bin/activate
+
+# Repo root resolution. SLURM spools the script to /var/spool so "$0" doesn't
+# point at the original path — use $SLURM_SUBMIT_DIR (set to the dir where you
+# ran `sbatch`). Submit from the repo root, or set REPO_ROOT explicitly.
+REPO_ROOT="${REPO_ROOT:-${SLURM_SUBMIT_DIR:-$PWD}}"
+if [[ ! -f "${REPO_ROOT}/sim/runner.py" ]]; then
+    echo "ERROR: sim/runner.py not found under REPO_ROOT=${REPO_ROOT}" >&2
+    echo "  Run 'sbatch sim/slurm/residuals.sbatch' from the repo root, or set REPO_ROOT explicitly." >&2
+    exit 1
+fi
+
+# Set SIM_PY_ENV_ACTIVATE=/path/to/venv/bin/activate in your .bashrc or before
+# calling sbatch, e.g.:   SIM_PY_ENV_ACTIVATE=~/venvs/kart/bin/activate sbatch ...
+: "${SIM_PY_ENV_ACTIVATE:=}"
+if [[ -n "$SIM_PY_ENV_ACTIVATE" ]]; then
+    # shellcheck source=/dev/null
+    source "$SIM_PY_ENV_ACTIVATE"
+fi
+
+OUT_DIR="${SCRATCH}/sim_out/residual-compare-${SLURM_JOB_ID:-local}-$(date +%Y%m%d-%H%M%S)"
+mkdir -p "$OUT_DIR"
+mkdir -p "${SCRATCH}/logs"
+
+echo "Job ${SLURM_JOB_ID:-local}: residual-compare -> $OUT_DIR"
+
+python3 "${REPO_ROOT}/sim/runner.py" compare-residuals \
+    --sim datasim \
+    --no-mlp \
+    --laps 5 \
+    --seeds 4 \
+    --modes off rls gbm both \
+    --n-jobs "${SLURM_CPUS_PER_TASK:-64}" \
+    --torch-threads 1 \
+    --out-dir "$OUT_DIR" \
+    --render
+
+echo "Done. Results in $OUT_DIR"
diff --git a/sim/slurm/run1.sbatch b/sim/slurm/run1.sbatch
new file mode 100644
index 00000000..9058fd41
--- /dev/null
+++ b/sim/slurm/run1.sbatch
@@ -0,0 +1,65 @@
+#!/bin/bash -l
+#SBATCH --job-name=sim-one-20laps
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=64
+#SBATCH --time=12:00:00
+#SBATCH -A lilly-rob1
+#SBATCH -p smallgpu
+#SBATCH --gres=gpu:1
+#SBATCH -o $SCRATCH/logs/sim-one-20laps_%j.out
+#SBATCH -e $SCRATCH/logs/sim-one-20laps_%j.err
+
+# Run a single 20-lap closed-loop MPC simulation with residuals applied and render a GIF.
+set -euo pipefail
+
+# Repo root resolution. SLURM spools the script to /var/spool so "$0" doesn't
+# point at the original path — use $SLURM_SUBMIT_DIR (set to the dir where you
+# ran `sbatch`). Submit from the repo root, or set REPO_ROOT explicitly.
+REPO_ROOT="${REPO_ROOT:-${SLURM_SUBMIT_DIR:-$PWD}}"
+if [[ ! -f "${REPO_ROOT}/sim/runner.py" ]]; then
+    echo "ERROR: sim/runner.py not found under REPO_ROOT=${REPO_ROOT}" >&2
+    echo "  Run 'sbatch sim/slurm/run1.sbatch' from the repo root, or set REPO_ROOT explicitly." >&2
+    exit 1
+fi
+source $SCRATCH/venvs/autonomous-kart/bin/activate
+# Python environment activation.
+# Set SIM_PY_ENV_ACTIVATE=/path/to/venv/bin/activate in your .bashrc or before
+# calling sbatch, e.g.:   SIM_PY_ENV_ACTIVATE=~/venvs/kart/bin/activate sbatch ...
+: "${SIM_PY_ENV_ACTIVATE:=}"
+if [[ -n "$SIM_PY_ENV_ACTIVATE" ]]; then
+    # shellcheck source=/dev/null
+    source "$SIM_PY_ENV_ACTIVATE"
+fi
+
+OUT_DIR="${SCRATCH}/sim_out/one-${SLURM_JOB_ID:-local}-$(date +%Y%m%d-%H%M%S)"
+mkdir -p "$OUT_DIR"
+mkdir -p "${SCRATCH}/logs"
+
+# Write MPC params overlay to a temp file — edit these values as needed.
+# target_speed_mps caps the kart's planned speed; line6.csv's per-tick vx_mps
+# is 6, so this cap only matters if you want to slow the kart below that.
+PARAMS_JSON="$(mktemp --suffix=.json)"
+cat > "$PARAMS_JSON" <<'PARAMS_EOF'
+{
+  "residual.mode": "apply"
+}
+PARAMS_EOF
+
+echo "Job ${SLURM_JOB_ID:-local}: one/20-lap run -> $OUT_DIR"
+
+python3 "${REPO_ROOT}/sim/runner.py" one \
+    --sim datasim \
+    --no-mlp \
+    --laps 20 \
+    --seed 0 \
+    --torch-threads "${SLURM_CPUS_PER_TASK:-64}" \
+    --params-json "$PARAMS_JSON" \
+    --out-dir "$OUT_DIR" \
+    --render \
+    --fps 30 \
+    --realtime-factor 5 \
+    --max-frames 8000
+
+rm -f "$PARAMS_JSON"
+echo "Done. Results in $OUT_DIR"
diff --git a/sim/slurm/sweep_params.sbatch b/sim/slurm/sweep_params.sbatch
new file mode 100644
index 00000000..fa7af659
--- /dev/null
+++ b/sim/slurm/sweep_params.sbatch
@@ -0,0 +1,79 @@
+#!/bin/bash -l
+#SBATCH --job-name=sim-sweep
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=64
+#SBATCH --time=12:00:00
+#SBATCH -A lilly-rob1
+#SBATCH -p smallgpu
+#SBATCH --gres=gpu:1
+#SBATCH -o $SCRATCH/logs/sim-sweep_%j.out
+#SBATCH -e $SCRATCH/logs/sim-sweep_%j.err
+
+# Run a 400-trial Bayesian (Optuna TPE) parameter sweep over MPC weights.
+set -euo pipefail
+source $SCRATCH/venvs/autonomous-kart/bin/activate
+
+# Repo root resolution. SLURM spools the script to /var/spool so "$0" doesn't
+# point at the original path — use $SLURM_SUBMIT_DIR (set to the dir where you
+# ran `sbatch`). Submit from the repo root, or set REPO_ROOT explicitly.
+REPO_ROOT="${REPO_ROOT:-${SLURM_SUBMIT_DIR:-$PWD}}"
+if [[ ! -f "${REPO_ROOT}/sim/runner.py" ]]; then
+    echo "ERROR: sim/runner.py not found under REPO_ROOT=${REPO_ROOT}" >&2
+    echo "  Run 'sbatch sim/slurm/sweep_params.sbatch' from the repo root, or set REPO_ROOT explicitly." >&2
+    exit 1
+fi
+
+# Set SIM_PY_ENV_ACTIVATE=/path/to/venv/bin/activate in your .bashrc or before
+# calling sbatch, e.g.:   SIM_PY_ENV_ACTIVATE=~/venvs/kart/bin/activate sbatch ...
+: "${SIM_PY_ENV_ACTIVATE:=}"
+if [[ -n "$SIM_PY_ENV_ACTIVATE" ]]; then
+    # shellcheck source=/dev/null
+    source "$SIM_PY_ENV_ACTIVATE"
+fi
+
+OUT_DIR="${SCRATCH}/sim_out/sweep-${SLURM_JOB_ID:-local}-$(date +%Y%m%d-%H%M%S)"
+mkdir -p "$OUT_DIR"
+mkdir -p "${SCRATCH}/logs"
+
+# === SWEEP PARAMS — edit me ===
+# Write the search-space spec to a temp JSON file.
+# Add/remove entries from "search", or change the "fixed" block below.
+SPEC_JSON="$(mktemp --suffix=.json)"
+cat > "$SPEC_JSON" <<'SPEC_EOF'
+{
+  "fixed": {
+    "residual.mode": "shadow"
+  },
+  "search": [
+    {"name": "w_d",              "dist": "log_uniform", "lo": 5.0,  "hi": 500.0},
+    {"name": "w_heading",        "dist": "log_uniform", "lo": 1.0,  "hi": 200.0},
+    {"name": "w_progress",       "dist": "log_uniform", "lo": 0.1,  "hi": 50.0},
+    {"name": "w_terminal_d",     "dist": "log_uniform", "lo": 20.0, "hi": 600.0},
+    {"name": "w_a_lat",          "dist": "log_uniform", "lo": 5.0,  "hi": 300.0},
+    {"name": "target_speed_pct", "dist": "uniform",     "lo": 0.4,  "hi": 1.0},
+    {"name": "horizon_steps",    "dist": "int",         "lo": 10,   "hi": 40},
+    {"name": "edge_inner_frac",  "dist": "uniform",     "lo": 0.2,  "hi": 0.95}
+  ]
+}
+SPEC_EOF
+# === end sweep params ===
+
+echo "Job ${SLURM_JOB_ID:-local}: sweep -> $OUT_DIR"
+
+python3 "${REPO_ROOT}/sim/runner.py" sweep \
+    --sim datasim \
+    --no-mlp \
+    --trials 400 \
+    --n-jobs "${SLURM_CPUS_PER_TASK:-64}" \
+    --startup 30 \
+    --laps 2 \
+    --seeds-per-trial 3 \
+    --torch-threads 1 \
+    --sweep-spec "$SPEC_JSON" \
+    --out-dir "$OUT_DIR" \
+    --render-best \
+    --log summary
+
+rm -f "$SPEC_JSON"
+echo "Done. Results in $OUT_DIR"
diff --git a/sim/slurm/validate.sbatch b/sim/slurm/validate.sbatch
new file mode 100644
index 00000000..f000ea80
--- /dev/null
+++ b/sim/slurm/validate.sbatch
@@ -0,0 +1,135 @@
+#!/bin/bash -l
+#SBATCH --job-name=sim-validate
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=64
+#SBATCH --time=12:00:00
+#SBATCH -A lilly-rob1
+#SBATCH -p smallgpu
+#SBATCH --gres=gpu:1
+#SBATCH -o $SCRATCH/logs/sim-validate_%j.out
+#SBATCH -e $SCRATCH/logs/sim-validate_%j.err
+
+# Recreate the open-loop multi-step rollout validation that produced the 5x /
+# 4.4x improvement of the data sim over sim_bicycle.py.  Holdout bag is the
+# smallest of the three (231452), never seen during MLP training.
+set -euo pipefail
+source $SCRATCH/venvs/autonomous-kart/bin/activate
+
+# Repo root resolution. SLURM spools the script to /var/spool so "$0" doesn't
+# point at the original path — use $SLURM_SUBMIT_DIR (set to the dir where you
+# ran `sbatch`). Submit from the repo root, or set REPO_ROOT explicitly.
+REPO_ROOT="${REPO_ROOT:-${SLURM_SUBMIT_DIR:-$PWD}}"
+if [[ ! -f "${REPO_ROOT}/sim/runner.py" ]]; then
+    echo "ERROR: sim/runner.py not found under REPO_ROOT=${REPO_ROOT}" >&2
+    echo "  Run 'sbatch sim/slurm/validate.sbatch' from the repo root, or set REPO_ROOT explicitly." >&2
+    exit 1
+fi
+
+# Optional python env activation.
+: "${SIM_PY_ENV_ACTIVATE:=}"
+if [[ -n "${SIM_PY_ENV_ACTIVATE}" ]]; then
+    # shellcheck source=/dev/null
+    source "${SIM_PY_ENV_ACTIVATE}"
+fi
+
+cd "${REPO_ROOT}"
+
+# =============================================================================
+# Paths.  Bags live under bags/; aligned npz cache goes under docs/superpowers
+# (gitignored).  The validation outputs go under $SCRATCH so they survive past
+# the job.
+# =============================================================================
+CACHE_DIR="${REPO_ROOT}/docs/superpowers/data_sim/cache"
+MODEL_DIR="${REPO_ROOT}/sim/model"
+OUT_DIR="${OUT_DIR:-${SCRATCH}/sim_out/validate-${SLURM_JOB_ID:-local}-$(date +%Y%m%d-%H%M%S)}"
+export OUT_DIR
+mkdir -p "${OUT_DIR}" "${CACHE_DIR}"
+
+echo "Job ${SLURM_JOB_ID:-local}: validate -> ${OUT_DIR}"
+
+# =============================================================================
+# Step 1: ensure the three aligned bag npz files exist.  Each bag is loaded
+# once and cached; subsequent runs of this script are no-ops on this step.
+# =============================================================================
+ensure_bag () {
+    local tag="$1"      # e.g. 231452
+    local bag_dir="${REPO_ROOT}/bags/mpc_run_20260517_${tag}"
+    local out_npz="${CACHE_DIR}/${tag}.npz"
+    if [[ -f "${out_npz}" ]]; then
+        echo "  cache hit: ${out_npz}"
+        return
+    fi
+    if [[ ! -d "${bag_dir}" ]]; then
+        echo "ERROR: bag dir not found: ${bag_dir}" >&2
+        echo "  Upload the bag to \$REPO_ROOT/bags/ or set CACHE_DIR to a pre-loaded location." >&2
+        exit 1
+    fi
+    echo "  loading ${bag_dir} -> ${out_npz}"
+    python3 "${REPO_ROOT}/sim/load_bags.py" "${bag_dir}" --out "${out_npz}"
+}
+
+echo "Step 1: ensure aligned bag cache"
+ensure_bag 215024     # training bag
+ensure_bag 232625     # training bag
+ensure_bag 231452     # holdout
+
+# =============================================================================
+# Step 2: run the validation harness.  Compares three sims on the holdout:
+#   - sim_bicycle (the existing toy model in src/autonomous_kart/.../sim_bicycle.py)
+#   - id_only     (identified bicycle, no MLP residual)
+#   - full        (identified bicycle + MLP residual + Mahalanobis clamp)
+# Output: validation.json (raw RMSE per horizon), validation.png (overview
+# curves), validation_report.pdf (4-page report with verdict).
+# Expected on the canonical model + holdout (231452):
+#   dy@1s:  sim_bicycle = 1.558 m
+#           id_only     = 0.321 m   (4.85x better)
+#           full        = 0.358 m   (4.36x better)
+# =============================================================================
+echo "Step 2: run validation harness (open-loop multi-step rollout RMSE)"
+python3 "${REPO_ROOT}/sim/validate.py" \
+    --holdout "${CACHE_DIR}/231452.npz" \
+    --params  "${MODEL_DIR}/bicycle_params.json" \
+    --noise   "${MODEL_DIR}/noise.json" \
+    --mlp     "${MODEL_DIR}/mlp.pt" \
+    --norm    "${MODEL_DIR}/norm.json" \
+    --clamp   "${MODEL_DIR}/clamp.json" \
+    --out-json "${OUT_DIR}/validation.json" \
+    --out-png  "${OUT_DIR}/validation.png" \
+    --out-pdf  "${OUT_DIR}/validation_report.pdf"
+
+# =============================================================================
+# Step 3: distil the headline numbers from validation.json so the SLURM .out
+# log contains the answer without needing to inspect the PDF.
+# =============================================================================
+echo ""
+echo "Step 3: headline summary"
+python3 - <<'PY'
+import json, os
+path = os.path.join(os.environ["OUT_DIR"], "validation.json")
+data = json.load(open(path))
+horizons = data["horizons_ticks"]
+dt = data["dt"]
+results = data["results"]
+print(f"  holdout: {data.get('holdout_tag', '?')}")
+print(f"  dt={dt:.4f}s  horizons (s) = {[round(h*dt, 2) for h in horizons]}")
+print()
+print(f"  {'sim':<14}{'dy@0.1s':>10}{'dy@0.5s':>10}{'dy@1.0s':>10}{'dy@2.0s':>10}")
+for name in ("sim_bicycle", "id_only", "full"):
+    dy = results[name]["dy"]
+    row = [dy[str(h)] for h in horizons]
+    print(f"  {name:<14}" + "".join(f"{v:10.3f}" for v in row))
+print()
+baseline_1s = results["sim_bicycle"]["dy"][str(horizons[2])]
+id_1s = results["id_only"]["dy"][str(horizons[2])]
+full_1s = results["full"]["dy"][str(horizons[2])]
+print(f"  speed-up at 1s horizon:")
+print(f"    id_only / sim_bicycle = {baseline_1s / id_1s:.2f}x")
+print(f"    full    / sim_bicycle = {baseline_1s / full_1s:.2f}x")
+PY
+
+echo ""
+echo "Done. Outputs in ${OUT_DIR}"
+echo "  - validation.json          (raw RMSE)"
+echo "  - validation.png           (overview curves)"
+echo "  - validation_report.pdf    (4-page report)"
diff --git a/sim/tests/__init__.py b/sim/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/sim/tests/conftest.py b/sim/tests/conftest.py
new file mode 100644
index 00000000..156c4c91
--- /dev/null
+++ b/sim/tests/conftest.py
@@ -0,0 +1,9 @@
+"""Shared pytest fixtures for sim tests."""
+import os
+import sys
+
+# Make `sim` importable as a top-level package by inserting the repo root.
+HERE = os.path.dirname(os.path.abspath(__file__))
+REPO = os.path.dirname(os.path.dirname(HERE))
+if REPO not in sys.path:
+    sys.path.insert(0, REPO)
diff --git a/sim/tests/test_clamp.py b/sim/tests/test_clamp.py
new file mode 100644
index 00000000..b21f51f6
--- /dev/null
+++ b/sim/tests/test_clamp.py
@@ -0,0 +1,29 @@
+import numpy as np
+
+from sim.clamp import Clamp, fit_clamp
+
+
+def test_alpha_one_at_training_mean():
+    rng = np.random.default_rng(0)
+    feats = rng.normal(0, 1, (5000, 11))
+    clamp = fit_clamp(feats, d_warn_pct=95, d_max_pct=99.5)
+    assert clamp.alpha(np.zeros(11)) == 1.0
+
+
+def test_alpha_zero_far_off():
+    rng = np.random.default_rng(0)
+    feats = rng.normal(0, 1, (5000, 11))
+    clamp = fit_clamp(feats, d_warn_pct=95, d_max_pct=99.5)
+    far = np.full(11, 100.0)
+    assert clamp.alpha(far) == 0.0
+
+
+def test_alpha_between_at_midpoint():
+    rng = np.random.default_rng(0)
+    feats = rng.normal(0, 1, (5000, 11))
+    clamp = fit_clamp(feats, d_warn_pct=95, d_max_pct=99.5)
+    halfway = (clamp.d_warn + clamp.d_max) / 2
+    assert clamp._alpha_from_d(clamp.d_warn) == 1.0
+    assert clamp._alpha_from_d(clamp.d_max) == 0.0
+    mid_alpha = clamp._alpha_from_d(halfway)
+    assert 0.45 < mid_alpha < 0.55
diff --git a/sim/tests/test_data_sim.py b/sim/tests/test_data_sim.py
new file mode 100644
index 00000000..e387e771
--- /dev/null
+++ b/sim/tests/test_data_sim.py
@@ -0,0 +1,126 @@
+import json
+
+import numpy as np
+import pytest
+
+from sim.data_sim import DataSim
+from sim.identify_bicycle import BicycleParams
+from sim.noise_model import NoiseModel
+
+
+@pytest.fixture
+def temp_artifacts(tmp_path):
+    params = BicycleParams()
+    params_path = tmp_path / "bicycle_params.json"
+    with open(params_path, "w") as f:
+        json.dump({"params": params.__dict__, "fit_info": {}}, f)
+    noise = NoiseModel(sigma_v=0.5, sigma_psidot=0.05,
+                       sigma_steer_cmd=0.2, sigma_throttle_cmd=0.5)
+    noise_path = tmp_path / "noise.json"
+    noise.save(str(noise_path))
+    return {"params": str(params_path), "noise": str(noise_path)}
+
+
+def test_step_returns_4_tuple_and_no_nan(temp_artifacts):
+    sim = DataSim(params_path=temp_artifacts["params"],
+                  noise_path=temp_artifacts["noise"],
+                  noise_mode="none")
+    sim.reset(0.0, 0.0, 0.0)
+    out = sim.step(target_mps=8.0, steer_deg=10.0, dt=1 / 60)
+    assert len(out) == 4
+    assert all(np.isfinite(out))
+
+
+def test_zero_command_no_motion(temp_artifacts):
+    sim = DataSim(params_path=temp_artifacts["params"],
+                  noise_path=temp_artifacts["noise"],
+                  noise_mode="none")
+    sim.reset(1.0, 2.0, 0.3)
+    for _ in range(60):
+        x, y, yaw, v = sim.step(0.0, 0.0, 1 / 60)
+    assert abs(v) < 0.01
+    assert abs(x - 1.0) < 0.01
+    assert abs(y - 2.0) < 0.01
+
+
+def test_deterministic_when_noise_none(temp_artifacts):
+    sim_a = DataSim(params_path=temp_artifacts["params"],
+                    noise_path=temp_artifacts["noise"],
+                    noise_mode="none")
+    sim_b = DataSim(params_path=temp_artifacts["params"],
+                    noise_path=temp_artifacts["noise"],
+                    noise_mode="none")
+    sim_a.reset(0, 0, 0)
+    sim_b.reset(0, 0, 0)
+    cmds = [(50.0, 5.0)] * 100 + [(80.0, -3.0)] * 100
+    for tm, sd in cmds:
+        a = sim_a.step(tm, sd, 1 / 60)
+        b = sim_b.step(tm, sd, 1 / 60)
+        assert a == b
+
+
+def test_gaussian_noise_differs_from_none(temp_artifacts):
+    sim_n = DataSim(params_path=temp_artifacts["params"],
+                    noise_path=temp_artifacts["noise"], noise_mode="none")
+    sim_g = DataSim(params_path=temp_artifacts["params"],
+                    noise_path=temp_artifacts["noise"], noise_mode="gaussian",
+                    rng_seed=42)
+    sim_n.reset(0, 0, 0)
+    sim_g.reset(0, 0, 0)
+    diffs = 0
+    for _ in range(120):
+        a = sim_n.step(8.0, 5.0, 1 / 60)
+        b = sim_g.step(8.0, 5.0, 1 / 60)
+        if a != b:
+            diffs += 1
+    assert diffs > 60, f"gaussian noise did not move the sim ({diffs}/120)"
+
+
+def test_bootstrap_falls_back_when_no_emp(temp_artifacts):
+    sim = DataSim(params_path=temp_artifacts["params"],
+                  noise_path=temp_artifacts["noise"], noise_mode="bootstrap")
+    sim.reset(0, 0, 0)
+    out = sim.step(8.0, 5.0, 1 / 60)
+    assert all(np.isfinite(out))
+
+
+def test_mlp_reduces_to_bicycle_off_distribution(tmp_path, temp_artifacts):
+    import json
+    import torch
+    from sim.clamp import Clamp
+    from sim.residual_mlp import FEATURE_DIM, Normaliser, ResidualMLP
+
+    # Build a deliberately constant-output residual MLP.
+    net = ResidualMLP()
+    with torch.no_grad():
+        for p in net.parameters():
+            p.zero_()
+        net.net[-1].bias[:] = torch.tensor([0.5, 0.05])
+    mlp_path = tmp_path / "mlp.pt"
+    torch.save(net.state_dict(), str(mlp_path))
+    norm = Normaliser(mean=np.zeros(FEATURE_DIM, dtype=np.float32),
+                      std=np.ones(FEATURE_DIM, dtype=np.float32))
+    norm_path = tmp_path / "norm.json"
+    with open(norm_path, "w") as f:
+        json.dump(norm.to_json(), f)
+    # Tight clamp around the origin — far-off inputs should give alpha = 0.
+    clamp = Clamp(
+        mean=np.zeros(FEATURE_DIM),
+        cov_inv=np.eye(FEATURE_DIM),
+        d_warn=1.0, d_max=2.0,
+    )
+    clamp_path = tmp_path / "clamp.json"
+    clamp.save(str(clamp_path))
+
+    sim_off = DataSim(
+        params_path=temp_artifacts["params"],
+        noise_path=temp_artifacts["noise"],
+        mlp_path=str(mlp_path), norm_path=str(norm_path), clamp_path=str(clamp_path),
+        noise_mode="none",
+    )
+    sim_off.reset(0, 0, 0)
+    sim_off.v = 12.0
+    out = sim_off.step(12.0, 60.0, 1 / 60)
+    # With alpha = 0 the residual contributes nothing; sim should behave as
+    # ID-only and finish with finite outputs.
+    assert all(np.isfinite(out))
diff --git a/sim/tests/test_identify_bicycle.py b/sim/tests/test_identify_bicycle.py
new file mode 100644
index 00000000..61f0f2e4
--- /dev/null
+++ b/sim/tests/test_identify_bicycle.py
@@ -0,0 +1,41 @@
+import numpy as np
+
+from sim.identify_bicycle import (
+    BicycleParams,
+    fit_params,
+    predict_derivatives,
+)
+
+
+def _synth_run(p: BicycleParams, dt: float, n: int, seed: int = 0):
+    rng = np.random.default_rng(seed)
+    cmd_throttle = 30.0 + 30.0 * np.sin(np.linspace(0, 12 * np.pi, n)) + rng.normal(0, 2, n)
+    cmd_steer = 10.0 * np.sin(np.linspace(0, 25 * np.pi, n))
+    cmd_throttle = np.clip(cmd_throttle, 0.0, 100.0)
+    # Use the full-array predict_derivatives (same path as fit_params residuals) so
+    # the synthetic data is exactly self-consistent with the model.
+    v = np.zeros(n)
+    dv_dt_obs, _ = predict_derivatives(p, v, cmd_throttle, cmd_steer, dt)
+    for i in range(n - 1):
+        v[i + 1] = v[i] + dv_dt_obs[i] * dt
+    dv_dt_obs, psi_dot_obs = predict_derivatives(p, v, cmd_throttle, cmd_steer, dt)
+    return v, cmd_throttle, cmd_steer, dv_dt_obs, psi_dot_obs
+
+
+def test_recovers_known_params_no_noise():
+    truth = BicycleParams(
+        accel_tau=0.6, brake_tau=0.3,
+        throttle_delay_s=0.05, steer_delay_s=0.05,
+        steer_gain=0.4, steer_slop_deg=1.0,
+        wheelbase_m=1.05, steer_rate_max_degps=180.0,
+        slip_angle_at_v=0.0, v_max_mps=12.0,
+    )
+    dt = 1.0 / 60.0
+    v, ct, cs, dv, ps = _synth_run(truth, dt, n=4000, seed=0)
+    fit, info = fit_params(v, ct, cs, dv, ps, dt, prior=BicycleParams(v_max_mps=12.0))
+    assert info["success"]
+    # Tolerances are loose because the slop+rate-limit makes the surface non-smooth.
+    assert abs(fit.accel_tau - truth.accel_tau) < 0.15
+    assert abs(fit.brake_tau - truth.brake_tau) < 0.15
+    assert abs(fit.steer_gain - truth.steer_gain) < 0.1
+    assert abs(fit.wheelbase_m - truth.wheelbase_m) < 0.1
diff --git a/sim/tests/test_load_bags.py b/sim/tests/test_load_bags.py
new file mode 100644
index 00000000..1b85a28e
--- /dev/null
+++ b/sim/tests/test_load_bags.py
@@ -0,0 +1,67 @@
+import os
+
+import numpy as np
+import pytest
+
+from sim.load_bags import (
+    AlignedBag,
+    central_diff,
+    make_history,
+    zoh_align,
+)
+
+
+def test_zoh_align_basic():
+    sample_t = np.array([0.0, 0.1, 0.3])
+    sample_v = np.array([10.0, 20.0, 30.0])
+    grid_t = np.array([0.0, 0.05, 0.1, 0.2, 0.3, 0.5])
+    out = zoh_align(sample_t, sample_v, grid_t)
+    np.testing.assert_array_equal(out, [10.0, 10.0, 20.0, 20.0, 30.0, 30.0])
+
+
+def test_zoh_align_unsorted_input():
+    sample_t = np.array([0.3, 0.0, 0.1])
+    sample_v = np.array([30.0, 10.0, 20.0])
+    grid_t = np.array([0.05, 0.15])
+    out = zoh_align(sample_t, sample_v, grid_t)
+    np.testing.assert_array_equal(out, [10.0, 20.0])
+
+
+def test_zoh_align_empty_raises():
+    with pytest.raises(ValueError):
+        zoh_align(np.array([]), np.array([]), np.array([0.0]))
+
+
+def test_central_diff_constant_slope():
+    dt = 0.01
+    x = np.arange(10) * 2.0
+    out = central_diff(x, dt)
+    np.testing.assert_allclose(out, 200.0)
+
+
+def test_make_history_pads_with_first():
+    v = np.array([1.0, 2.0, 3.0, 4.0])
+    h = make_history(v, depth=3)
+    assert h.shape == (4, 3)
+    np.testing.assert_array_equal(h[3], [3.0, 2.0, 1.0])
+    np.testing.assert_array_equal(h[2], [2.0, 1.0, 1.0])
+    np.testing.assert_array_equal(h[0], [1.0, 1.0, 1.0])
+
+
+REPO = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+HOLDOUT_NPZ = os.path.join(REPO, "docs", "superpowers", "data_sim", "cache", "231452.npz")
+
+
+@pytest.mark.skipif(not os.path.isfile(HOLDOUT_NPZ),
+                    reason="run sim/load_bags.py first")
+def test_holdout_roundtrip():
+    bag = AlignedBag.from_npz(HOLDOUT_NPZ)
+    n = bag.t.size
+    assert n > 10000
+    assert bag.cmd_throttle.shape == (n,)
+    assert bag.cmd_steer_hist.shape[1] == 4
+    assert bag.cmd_throttle_hist.shape[1] == 3
+    assert bag.autonomous.dtype == bool
+    assert bag.autonomous.mean() > 0.5  # holdout is mostly autonomous
+    assert np.isfinite(bag.v).all()
+    assert np.isfinite(bag.psi_dot).all()
diff --git a/sim/tests/test_noise_model.py b/sim/tests/test_noise_model.py
new file mode 100644
index 00000000..a3b0b046
--- /dev/null
+++ b/sim/tests/test_noise_model.py
@@ -0,0 +1,29 @@
+import numpy as np
+
+from sim.noise_model import NoiseModel, estimate_noise
+
+
+def test_zero_residuals_give_small_sigma():
+    n = 1000
+    rng = np.random.default_rng(0)
+    v_resid = rng.normal(0.0, 0.5, n)
+    psi_resid = rng.normal(0.0, 0.05, n)
+    cmd_throttle = rng.normal(50.0, 1.0, n)
+    cmd_steer = rng.normal(0.0, 0.5, n)
+    is_straight = np.abs(cmd_steer) < 1.0
+    is_steady = np.abs(np.diff(cmd_throttle, prepend=cmd_throttle[0])) < 0.5
+    nm = estimate_noise(v_resid, psi_resid, cmd_throttle, cmd_steer,
+                        is_straight, is_steady)
+    assert 0.3 < nm.sigma_v < 0.7
+    assert 0.03 < nm.sigma_psidot < 0.07
+    assert nm.sigma_steer_cmd > 0
+    assert nm.sigma_throttle_cmd > 0
+
+
+def test_serialise_roundtrip(tmp_path):
+    nm = NoiseModel(sigma_v=0.4, sigma_psidot=0.05,
+                    sigma_steer_cmd=0.2, sigma_throttle_cmd=0.7)
+    p = tmp_path / "noise.json"
+    nm.save(str(p))
+    nm2 = NoiseModel.load(str(p))
+    assert nm == nm2
diff --git a/sim/tests/test_residual_mlp.py b/sim/tests/test_residual_mlp.py
new file mode 100644
index 00000000..7342a80e
--- /dev/null
+++ b/sim/tests/test_residual_mlp.py
@@ -0,0 +1,26 @@
+import numpy as np
+import torch
+
+from sim.residual_mlp import (
+    FEATURE_DIM,
+    ResidualMLP,
+    build_features,
+)
+
+
+def test_feature_dim_matches():
+    n = 5
+    feats = build_features(
+        v=np.ones(n), psi_dot=np.zeros(n),
+        cmd_throttle=np.full(n, 50.0), cmd_steer=np.zeros(n),
+        cmd_throttle_hist=np.zeros((n, 3)),
+        cmd_steer_hist=np.zeros((n, 4)),
+    )
+    assert feats.shape == (n, FEATURE_DIM)
+
+
+def test_mlp_forward_shape():
+    net = ResidualMLP()
+    x = torch.zeros(8, FEATURE_DIM)
+    y = net(x)
+    assert y.shape == (8, 2)
diff --git a/sim/tests/test_smoke.py b/sim/tests/test_smoke.py
new file mode 100644
index 00000000..f7a15fc5
--- /dev/null
+++ b/sim/tests/test_smoke.py
@@ -0,0 +1,7 @@
+def test_package_importable():
+    import sim  # noqa: F401
+
+
+def test_torch_available():
+    import torch
+    assert torch.__version__
diff --git a/sim/tests/test_validate.py b/sim/tests/test_validate.py
new file mode 100644
index 00000000..be9186fa
--- /dev/null
+++ b/sim/tests/test_validate.py
@@ -0,0 +1,27 @@
+import numpy as np
+
+from sim.validate import rollout_rmse
+
+
+def test_rollout_rmse_zero_on_perfect_model():
+    n = 1000
+    bag = {
+        "t": np.arange(n) / 60.0,
+        "x": np.arange(n) * 0.1, "y": np.zeros(n), "yaw": np.zeros(n),
+        "v": np.full(n, 6.0),
+        "cmd_throttle": np.full(n, 60.0), "cmd_steer": np.zeros(n),
+    }
+
+    class PerfectSim:
+        def reset(self, x, y, yaw):
+            self.x, self.y, self.yaw, self.v = x, y, yaw, 6.0
+        def step(self, target_mps, steer_deg, dt):
+            self.x += self.v * dt
+            return self.x, self.y, self.yaw, self.v
+
+    sim = PerfectSim()
+    rmse = rollout_rmse(sim, bag, anchor_indices=[0, 100, 500],
+                        horizons_ticks=[6, 30, 60], dt=1 / 60)
+    for metric, by_horizon in rmse.items():
+        for h, v in by_horizon.items():
+            assert v < 1e-6, f"{metric}@h{h} = {v}"
diff --git a/sim/validate.py b/sim/validate.py
new file mode 100644
index 00000000..617f0da6
--- /dev/null
+++ b/sim/validate.py
@@ -0,0 +1,282 @@
+"""Open-loop multi-step rollout RMSE on a holdout bag, comparing three sims."""
+from __future__ import annotations
+
+from typing import Dict, List
+
+import numpy as np
+
+
+def rollout_rmse(
+    sim,
+    bag: dict,
+    anchor_indices: List[int],
+    horizons_ticks: List[int],
+    dt: float,
+) -> Dict[str, Dict[int, float]]:
+    """For each anchor, snap sim to bag state, feed bag commands, compare
+    against bag /odom at each horizon. Returns
+    {"dx": {h: rmse}, "dy": ..., "dyaw": ..., "dv": ...}."""
+    sq = {m: {h: [] for h in horizons_ticks} for m in ("dx", "dy", "dyaw", "dv")}
+    H = max(horizons_ticks)
+    for a in anchor_indices:
+        if a + H >= bag["x"].size:
+            continue
+        sim.reset(float(bag["x"][a]), float(bag["y"][a]), float(bag["yaw"][a]))
+        sim.v = float(bag["v"][a])
+        for h in range(1, H + 1):
+            i = a + h - 1
+            x, y, yaw, v = sim.step(float(bag["cmd_throttle"][i]),
+                                    float(bag["cmd_steer"][i]), dt)
+            if h in sq["dx"]:
+                sq["dx"][h].append((x - bag["x"][a + h]) ** 2)
+                sq["dy"][h].append((y - bag["y"][a + h]) ** 2)
+                sq["dyaw"][h].append(_angle_diff(yaw, bag["yaw"][a + h]) ** 2)
+                sq["dv"][h].append((v - bag["v"][a + h]) ** 2)
+    return {m: {h: float(np.sqrt(np.mean(vs))) for h, vs in by_h.items() if vs}
+            for m, by_h in sq.items()}
+
+
+def _angle_diff(a: float, b: float) -> float:
+    d = a - b
+    return float(np.arctan2(np.sin(d), np.cos(d)))
+
+
+def _bag_to_dict(npz_path: str) -> dict:
+    from sim.load_bags import AlignedBag
+    bag = AlignedBag.from_npz(npz_path)
+    return {
+        "t": bag.t,
+        "x": bag.odom_x, "y": bag.odom_y, "yaw": bag.odom_yaw, "v": bag.v,
+        "cmd_throttle": bag.cmd_throttle, "cmd_steer": bag.cmd_steer,
+        "autonomous": bag.autonomous,
+    }
+
+
+def _build_bicycle_sim():
+    """sim_bicycle.BicycleModel wrapped as a DataSim-shaped step()."""
+    import os
+    import sys
+    REPO = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    sys.path.insert(0, os.path.join(REPO, "src", "autonomous_kart"))
+    from autonomous_kart.nodes.localization.sim_bicycle import BicycleModel
+    return BicycleModel(wheelbase_m=1.05, v_max_mps=12.0, steer_max_deg=60.0)
+
+
+def _build_id_sim(params_path, noise_path):
+    from sim.data_sim import DataSim
+    return DataSim(params_path=params_path, noise_path=noise_path, noise_mode="none")
+
+
+def _build_full_sim(params_path, noise_path, mlp_path, norm_path, clamp_path):
+    from sim.data_sim import DataSim
+    return DataSim(params_path=params_path, noise_path=noise_path,
+                   mlp_path=mlp_path, norm_path=norm_path, clamp_path=clamp_path,
+                   noise_mode="none")
+
+
+def plot_validation(json_path: str, out_png: str) -> None:
+    import json
+    import matplotlib
+    matplotlib.use("Agg")
+    import matplotlib.pyplot as plt
+
+    with open(json_path) as f:
+        payload = json.load(f)
+    horizons = payload["horizons_ticks"]
+    dt = payload["dt"]
+    results = payload["results"]
+    horizons_s = [h * dt for h in horizons]
+    metrics = [("dx", "x error (m)"), ("dy", "y error (m)"),
+               ("dyaw", "yaw error (rad)"), ("dv", "v error (m/s)")]
+    colors = {"sim_bicycle": "#888", "id_only": "#37c", "full": "#c33"}
+
+    fig, axes = plt.subplots(1, len(metrics), figsize=(15, 4))
+    for ax, (key, label) in zip(axes, metrics):
+        for sim_name in ("sim_bicycle", "id_only", "full"):
+            ys = [results[sim_name][key][str(h)] for h in horizons]
+            ax.plot(horizons_s, ys, marker="o", color=colors[sim_name], label=sim_name)
+        ax.set_xlabel("horizon (s)")
+        ax.set_ylabel(label)
+        ax.grid(alpha=0.3)
+    axes[0].legend(loc="upper left", frameon=False)
+    fig.tight_layout()
+    fig.savefig(out_png, dpi=140)
+    plt.close(fig)
+    print(f"wrote {out_png}")
+
+
+def build_pdf_report(
+    json_path: str,
+    out_pdf: str,
+    params_path: str,
+    bag: dict,
+    sims: dict,
+    anchors: list,
+    horizons_ticks: list,
+    dt: float,
+) -> None:
+    import json
+    import os
+    import matplotlib
+    matplotlib.use("Agg")
+    import matplotlib.pyplot as plt
+    from matplotlib.backends.backend_pdf import PdfPages
+
+    with open(json_path) as f:
+        payload = json.load(f)
+    results = payload["results"]
+    horizons_s = [h * dt for h in horizons_ticks]
+    metrics = [("dx", "x err (m)"), ("dy", "y err (m)"),
+               ("dyaw", "yaw err (rad)"), ("dv", "v err (m/s)")]
+    colors = {"sim_bicycle": "#888", "id_only": "#37c", "full": "#c33"}
+
+    with open(params_path) as f:
+        params_payload = json.load(f)
+
+    with PdfPages(out_pdf) as pdf:
+        # Page 1: title + verdict + identified params
+        fig = plt.figure(figsize=(11, 8.5))
+        fig.text(0.5, 0.92, "Data-Driven Kart Sim — Validation Report",
+                 ha="center", fontsize=16, weight="bold")
+        fig.text(0.5, 0.88, os.path.basename(json_path), ha="center", fontsize=9, color="#666")
+
+        all_strict = all(
+            results["full"][m][str(h)] < min(results["sim_bicycle"][m][str(h)],
+                                              results["id_only"][m][str(h)])
+            for m, _ in metrics for h in horizons_ticks
+        )
+        verdict = "PASS — full sim strictly beats both baselines on every horizon and metric."
+        if not all_strict:
+            verdict = "PARTIAL — full sim wins on most but not all horizons/metrics. See tables."
+        fig.text(0.5, 0.80, verdict, ha="center", fontsize=11)
+
+        param_lines = [f"{k}: {v:.4f}" for k, v in params_payload["params"].items()]
+        fig.text(0.1, 0.55, "Identified bicycle parameters:", fontsize=11, weight="bold")
+        for i, line in enumerate(param_lines):
+            fig.text(0.1, 0.50 - 0.025 * i, line, family="monospace", fontsize=9)
+
+        fig.text(0.55, 0.55, "Setup", fontsize=11, weight="bold")
+        fig.text(0.55, 0.50, f"Holdout bag: {payload.get('holdout_tag', '231452')}", fontsize=9)
+        fig.text(0.55, 0.475, f"dt: {dt:.4f}s  ({1.0/dt:.1f} Hz grid)", fontsize=9)
+        fig.text(0.55, 0.45, f"Anchors used: {len(anchors)}", fontsize=9)
+        fig.text(0.55, 0.425, f"Horizons (s): {horizons_s}", fontsize=9)
+        pdf.savefig(fig)
+        plt.close(fig)
+
+        # Page 2: per-metric curves
+        fig, axes = plt.subplots(2, 2, figsize=(11, 8.5))
+        for ax, (key, label) in zip(axes.ravel(), metrics):
+            for sim_name in ("sim_bicycle", "id_only", "full"):
+                ys = [results[sim_name][key][str(h)] for h in horizons_ticks]
+                ax.plot(horizons_s, ys, marker="o", color=colors[sim_name], label=sim_name)
+            ax.set_xlabel("horizon (s)")
+            ax.set_ylabel(label)
+            ax.grid(alpha=0.3)
+        axes[0, 0].legend(loc="upper left", frameon=False)
+        fig.suptitle("RMSE vs rollout horizon — three-way comparison")
+        fig.tight_layout(rect=(0, 0, 1, 0.96))
+        pdf.savefig(fig)
+        plt.close(fig)
+
+        # Page 3: RMSE table
+        fig = plt.figure(figsize=(11, 8.5))
+        fig.text(0.5, 0.95, "RMSE table by horizon", ha="center", fontsize=14, weight="bold")
+        col_labels = [f"{h * dt:.2f}s" for h in horizons_ticks]
+        row_labels = []
+        cells = []
+        for sim_name in ("sim_bicycle", "id_only", "full"):
+            for key, label in metrics:
+                row_labels.append(f"{sim_name} — {key}")
+                cells.append([f"{results[sim_name][key][str(h)]:.3g}" for h in horizons_ticks])
+        ax = fig.add_axes((0.1, 0.1, 0.8, 0.8))
+        ax.axis("off")
+        table = ax.table(cellText=cells, rowLabels=row_labels, colLabels=col_labels,
+                         loc="center", cellLoc="right")
+        table.auto_set_font_size(False)
+        table.set_fontsize(8)
+        pdf.savefig(fig)
+        plt.close(fig)
+
+        # Page 4: trajectory overlays
+        sample_anchors = anchors[:: max(1, len(anchors) // 5)][:5]
+        H_show = horizons_ticks[-1]
+        fig, axes = plt.subplots(2, 3, figsize=(13, 8.5))
+        for ax, a in zip(axes.ravel(), sample_anchors):
+            ax.plot(bag["x"][a:a + H_show + 1], bag["y"][a:a + H_show + 1],
+                    color="#333", lw=1.5, label="bag /odom")
+            for sim_name, sim in sims.items():
+                sim.reset(float(bag["x"][a]), float(bag["y"][a]), float(bag["yaw"][a]))
+                sim.v = float(bag["v"][a])
+                xs, ys = [sim.x], [sim.y]
+                for h in range(H_show):
+                    x, y, _, _ = sim.step(float(bag["cmd_throttle"][a + h]),
+                                          float(bag["cmd_steer"][a + h]), dt)
+                    xs.append(x); ys.append(y)
+                ax.plot(xs, ys, color=colors[sim_name], lw=1.0, label=sim_name)
+            ax.set_aspect("equal", adjustable="datalim")
+            ax.set_title(f"anchor t={bag['t'][a]:.1f}s")
+            ax.grid(alpha=0.3)
+        axes.ravel()[0].legend(loc="upper left", frameon=False, fontsize=8)
+        if len(sample_anchors) < 6:
+            axes.ravel()[-1].axis("off")
+        fig.suptitle("Trajectory overlays over the longest horizon")
+        fig.tight_layout(rect=(0, 0, 1, 0.96))
+        pdf.savefig(fig)
+        plt.close(fig)
+
+    print(f"wrote {out_pdf}")
+
+
+def main():
+    import argparse
+    import json
+    import os
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--holdout", required=True)
+    ap.add_argument("--params", required=True)
+    ap.add_argument("--noise", required=True)
+    ap.add_argument("--mlp", required=True)
+    ap.add_argument("--norm", required=True)
+    ap.add_argument("--clamp", required=True)
+    ap.add_argument("--out-json", required=True)
+    ap.add_argument("--out-png", default="")
+    ap.add_argument("--out-pdf", default="")
+    ap.add_argument("--anchor-stride", type=int, default=30)
+    args = ap.parse_args()
+
+    bag = _bag_to_dict(args.holdout)
+    dt = float(np.median(np.diff(bag["t"])))
+    horizons_ticks = [int(round(h / dt)) for h in (0.1, 0.5, 1.0, 2.0)]
+    anchors = [i for i in range(0, bag["x"].size - max(horizons_ticks) - 1, args.anchor_stride)
+               if bag["autonomous"][i]]
+    print(f"{len(anchors)} anchors, horizons (ticks) = {horizons_ticks}")
+
+    sims = {
+        "sim_bicycle": _build_bicycle_sim(),
+        "id_only": _build_id_sim(args.params, args.noise),
+        "full": _build_full_sim(args.params, args.noise, args.mlp, args.norm, args.clamp),
+    }
+    results = {name: rollout_rmse(sim, bag, anchors, horizons_ticks, dt)
+               for name, sim in sims.items()}
+    os.makedirs(os.path.dirname(args.out_json) or ".", exist_ok=True)
+    with open(args.out_json, "w") as f:
+        json.dump({"horizons_ticks": horizons_ticks, "dt": dt,
+                   "holdout_tag": os.path.basename(args.holdout),
+                   "results": results}, f, indent=2)
+    print(f"wrote {args.out_json}")
+    if args.out_png:
+        plot_validation(args.out_json, args.out_png)
+    if args.out_pdf:
+        build_pdf_report(args.out_json, args.out_pdf,
+                         args.params, bag, sims, anchors, horizons_ticks, dt)
+    for name, res in results.items():
+        print(f"  {name}: dy@1s = {res['dy'].get(horizons_ticks[2], float('nan')):.3f} m")
+
+
+if __name__ == "__main__":
+    import os
+    import sys
+    REPO = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    if REPO not in sys.path:
+        sys.path.insert(0, REPO)
+    main()
diff --git a/src/autonomous_kart/autonomous_kart/launch/bringup_pi.launch.py b/src/autonomous_kart/autonomous_kart/launch/bringup_all.launch.py
similarity index 100%
rename from src/autonomous_kart/autonomous_kart/launch/bringup_pi.launch.py
rename to src/autonomous_kart/autonomous_kart/launch/bringup_all.launch.py
diff --git a/src/autonomous_kart/autonomous_kart/launch/bringup_jetson.launch.py b/src/autonomous_kart/autonomous_kart/launch/bringup_jetson.launch.py
new file mode 100644
index 00000000..4bdd4489
--- /dev/null
+++ b/src/autonomous_kart/autonomous_kart/launch/bringup_jetson.launch.py
@@ -0,0 +1,68 @@
+from launch import LaunchDescription
+from launch.actions import DeclareLaunchArgument, GroupAction
+from launch.substitutions import LaunchConfiguration
+from launch_ros.actions import Node, SetParameter, SetParametersFromFile
+from ament_index_python.packages import get_package_share_directory
+import os
+
+
+def generate_launch_description():
+    pkg_share = get_package_share_directory("autonomous_kart")
+
+    gps_yaml = os.path.join(pkg_share, "params", "gps.yaml")
+    safety_yaml = os.path.join(pkg_share, "params", "safety.yaml")
+    system_yaml = os.path.join(pkg_share, "params", "system.yaml")
+    pathfinder_yaml = os.path.join(pkg_share, "params", "pathfinder.yaml")
+    localization_yaml = os.path.join(pkg_share, "params", "localization.yaml")
+    imu_yaml = os.path.join(pkg_share, "params", "imu.yaml")
+    e_comms_yaml = os.path.join(pkg_share, "params", "e_comms.yaml")
+
+    sim_mode = LaunchConfiguration("simulation_mode")
+
+    return LaunchDescription(
+        [
+            DeclareLaunchArgument("simulation_mode", default_value="false"),
+            GroupAction(
+                [
+                    SetParametersFromFile(system_yaml),
+                    SetParameter(name="simulation_mode", value=sim_mode),
+                    Node(
+                        package="autonomous_kart",
+                        executable="pathfinder_node",
+                        name="pathfinder_node",
+                        parameters=[pathfinder_yaml, safety_yaml],
+                    ),
+                    Node(
+                        package="autonomous_kart",
+                        executable="opencv_pathfinder_node",
+                        name="opencv_pathfinder_node",
+                        parameters=[gps_yaml],
+                    ),
+                    Node(
+                        package="autonomous_kart",
+                        executable="localization_node",
+                        name="localization_node",
+                        parameters=[pathfinder_yaml, localization_yaml],
+                    ),
+                    Node(
+                        package="autonomous_kart",
+                        executable="e_comms_node",
+                        name="e_comms_node",
+                        parameters=[e_comms_yaml],
+                    ),
+                    Node(
+                        package="autonomous_kart",
+                        executable="gps_node",
+                        name="gps_node",
+                        parameters=[gps_yaml],
+                    ),
+                    Node(
+                        package="autonomous_kart",
+                        executable="imu_node",
+                        name="imu_node",
+                        parameters=[imu_yaml],
+                    ),
+                ]
+            ),
+        ]
+    )
diff --git a/src/autonomous_kart/autonomous_kart/launch/bringup_rubik.launch.py b/src/autonomous_kart/autonomous_kart/launch/bringup_rubik.launch.py
new file mode 100644
index 00000000..07414f51
--- /dev/null
+++ b/src/autonomous_kart/autonomous_kart/launch/bringup_rubik.launch.py
@@ -0,0 +1,40 @@
+from launch import LaunchDescription
+from launch.actions import DeclareLaunchArgument, GroupAction, ExecuteProcess
+from launch.substitutions import LaunchConfiguration
+from launch_ros.actions import Node, SetParameter, SetParametersFromFile
+from ament_index_python.packages import get_package_share_directory
+from datetime import datetime
+import os
+
+
+def generate_launch_description():
+    pkg_share = get_package_share_directory("autonomous_kart")
+
+    camera_yaml = os.path.join(pkg_share, "params", "camera.yaml")
+    system_yaml = os.path.join(pkg_share, "params", "system.yaml")
+
+    sim_mode = LaunchConfiguration("simulation_mode")
+
+    return LaunchDescription(
+        [
+            DeclareLaunchArgument("simulation_mode", default_value="false"),
+            GroupAction(
+                [
+                    SetParametersFromFile(system_yaml),
+                    SetParameter(name="simulation_mode", value=sim_mode),
+
+                    Node(
+                        package="autonomous_kart",
+                        executable="camera_node",
+                        name="camera_node",
+                        parameters=[camera_yaml],
+                    ),
+                    Node(
+                        package="autonomous_kart",
+                        executable="master_api",
+                        name="master_api",
+                    ),
+                ]
+            ),
+        ]
+    )
diff --git a/src/autonomous_kart/autonomous_kart/nodes/camera/camera_node.py b/src/autonomous_kart/autonomous_kart/nodes/camera/camera_node.py
index f8514c34..2c223d99 100644
--- a/src/autonomous_kart/autonomous_kart/nodes/camera/camera_node.py
+++ b/src/autonomous_kart/autonomous_kart/nodes/camera/camera_node.py
@@ -54,7 +54,7 @@ def __init__(self):
                 raise RuntimeError(f"Failed to open video: {video_path}")
 
         else:
-            self.cap = cv2.VideoCapture(0) 
+            self.cap = cv2.VideoCapture('/dev/video0') 
             self.video_fps = self.fps
 
             if not self.cap.isOpened():
@@ -84,6 +84,7 @@ def timer_callback(self):
                 self.logger.warning("No frame available")
 
         if publish_frame is not None:
+            self.logger.info(f"Publishing frame {self.frame_counter}")
             self.image_pub.publish(publish_frame)
             self.frame_counter += 1
 
diff --git a/src/autonomous_kart/autonomous_kart/nodes/e_comms/e_comms_node.py b/src/autonomous_kart/autonomous_kart/nodes/e_comms/e_comms_node.py
index 93b27fe1..4873210e 100644
--- a/src/autonomous_kart/autonomous_kart/nodes/e_comms/e_comms_node.py
+++ b/src/autonomous_kart/autonomous_kart/nodes/e_comms/e_comms_node.py
@@ -116,7 +116,8 @@ def _on_can_msg(self, msg: can.Message):
             return
         
         data = bytes(msg.data)  # copy, don't hold a reference
-        
+        self.cmd_count += 1
+        # self.logger.info(f"Msg: {msg.arbitration_id:X}")
         if msg.arbitration_id == STATUS_ID:
             self.executor.create_task(lambda: self.handle_adcb_status_msg(data))
         elif msg.arbitration_id == VESC_STATUS_1_ID:
diff --git a/src/autonomous_kart/autonomous_kart/nodes/imu/imu_node.py b/src/autonomous_kart/autonomous_kart/nodes/imu/imu_node.py
index 8b6c05ff..1de72ece 100644
--- a/src/autonomous_kart/autonomous_kart/nodes/imu/imu_node.py
+++ b/src/autonomous_kart/autonomous_kart/nodes/imu/imu_node.py
@@ -11,7 +11,7 @@
 from rclpy.node import Node
 
 from sensor_msgs.msg import Imu
-from std_msgs.msg import Empty, Float32MultiArray, String
+from std_msgs.msg import Empty, Float32, Float32MultiArray, String
 
 from smbus2 import SMBus
 
@@ -93,7 +93,7 @@ def __init__(self):
         self.imu_publisher = self.create_publisher(Imu, "imu", 10)
         self.status_publisher = self.create_publisher(String, "imu/calibration_status", 1)
 
-        self.create_subscription(Float32MultiArray, "cmd_drive", self._cmd_vel_callback, 5)
+        self.create_subscription(Float32, "e_comms/kart_speed_m_per_s", self._cmd_vel_callback, 5)
         self.create_subscription(Empty, "imu/calibrate", self._calibrate_trigger, 1)
 
         self.logger.info(f"IMU Node started - Mode: {'SIM' if self.sim_mode else 'REAL'}")
@@ -132,10 +132,8 @@ def s16(hi, lo):
         )
         return accel, gyro
 
-    def _cmd_vel_callback(self, msg: Float32MultiArray):
-        if not msg.data:
-            return
-        self._last_cmd_vel = float(msg.data[0])
+    def _cmd_vel_callback(self, msg: Float32):
+        self._last_cmd_vel = float(msg.data)
         self._last_cmd_vel_t = time.time()
 
     def _calibrate_trigger(self, _msg: Empty):
@@ -166,6 +164,8 @@ def _cmd_vel_is_zero(self) -> bool:
     def _sample_indicates_motion(self, accel, gyro):
         # Compare against running mean so a constant chip bias doesn't
         # look like motion. First sample has no mean yet → trivially passes.
+        # Per-sample |accel| is too noisy on this chip; mean |accel| vs |g|
+        # is checked once at finish instead.
         with self._lock:
             count = self._calib_count
             gyro_mean = (
@@ -174,9 +174,6 @@ def _sample_indicates_motion(self, accel, gyro):
         for axis, val, mean in zip(("x", "y", "z"), gyro, gyro_mean):
             if abs(val - mean) > self.gyro_motion_thresh:
                 return f"gyro {axis} delta={val - mean:.3f} rad/s exceeds {self.gyro_motion_thresh}"
-        accel_mag = math.sqrt(sum(a * a for a in accel))
-        if abs(accel_mag - abs(self.default_g)) > self.accel_motion_thresh:
-            return f"|accel|={accel_mag:.3f} m/s^2 deviates from g by > {self.accel_motion_thresh}"
         return None
 
     def _accumulate(self, accel, gyro):
@@ -188,13 +185,38 @@ def _accumulate(self, accel, gyro):
 
     def _finish_calibration(self):
         with self._lock:
-            self.gyro_bias = [s / self._calib_count for s in self._calib_sum]
+            count = self._calib_count
+            gyro_bias = [s / count for s in self._calib_sum]
+            accel_mean_raw = np.array([s / count for s in self._calib_accel_sum])
+        # Reject the run if the averaged |accel| is far from |g| - kart wasn't
+        # actually at rest (or sensor is badly miscalibrated). Tolerant because
+        # this chip's scale factor can be off by several percent.
+        accel_mean_mag = float(np.linalg.norm(accel_mean_raw))
+        g_mag = abs(self.default_g)
+        if abs(accel_mean_mag - g_mag) > self.accel_motion_thresh:
+            reason = (
+                f"mean |accel|={accel_mean_mag:.3f} deviates from |g|={g_mag:.3f} "
+                f"by > {self.accel_motion_thresh}"
+            )
+            self.logger.warning(f"Calibration aborted at finish: {reason}")
+            self._reset_calibration(reason)
+            return
+        with self._lock:
+            # Level correction: any x/y component in the measured gravity after R_MOUNT is residual chassis tilt
+            a_post_mount = R_MOUNT @ accel_mean_raw
+            target_z = math.copysign(abs(self.default_g), a_post_mount[2])
+            target = np.array([0.0, 0.0, target_z])
+            R_level = _level_rotation(a_post_mount, target)
+            R_full = R_level @ R_MOUNT
+            a_corrected = R_full @ accel_mean_raw
+            self.gyro_bias = gyro_bias
+            self.R = R_full
             self.state = CALIBRATED
             self.last_error = ""
-            count = self._calib_count
-            bias_snapshot = list(self.gyro_bias)
+            bias_snapshot = list(gyro_bias)
         self.logger.info(
-            f"Calibration complete after {count} samples. gyro_bias={bias_snapshot}"
+            f"Calibration complete after {count} samples. "
+            f"gyro_bias={bias_snapshot}  accel_after_R={a_corrected.tolist()}"
         )
         self._save_cache()
         self._publish_status()
@@ -209,12 +231,22 @@ def _try_load_cache(self):
             bias = data["gyro_bias"]
             if not (isinstance(bias, list) and len(bias) == 3):
                 raise ValueError("gyro_bias must be a length-3 list")
+
+            R_arr = R_MOUNT.copy()
+            R_data = data.get("R")
+            if R_data is not None:
+                R_arr = np.asarray(R_data, dtype=float)
+                if R_arr.shape != (3, 3):
+                    raise ValueError("R must be 3x3")
             with self._lock:
                 self.gyro_bias = [float(b) for b in bias]
+                self.R = R_arr
                 self.state = CALIBRATED
                 bias_snapshot = list(self.gyro_bias)
+                R_snapshot = self.R.tolist()
             self.logger.info(
-                f"Loaded calibration from {self.cache_path}: gyro_bias={bias_snapshot}"
+                f"Loaded calibration from {self.cache_path}: "
+                f"gyro_bias={bias_snapshot}  R={R_snapshot}"
             )
         except (OSError, ValueError, KeyError, json.JSONDecodeError) as e:
             self.logger.warning(
@@ -225,6 +257,7 @@ def _save_cache(self):
         with self._lock:
             payload = {
                 "gyro_bias": list(self.gyro_bias),
+                "R": np.asarray(self.R).tolist(),
                 "samples": self._calib_count,
                 "timestamp": time.time(),
             }
diff --git a/src/autonomous_kart/autonomous_kart/nodes/localization/localization_node.py b/src/autonomous_kart/autonomous_kart/nodes/localization/localization_node.py
index a4b8565a..dc2c044d 100644
--- a/src/autonomous_kart/autonomous_kart/nodes/localization/localization_node.py
+++ b/src/autonomous_kart/autonomous_kart/nodes/localization/localization_node.py
@@ -20,7 +20,7 @@
 from geometry_msgs.msg import Quaternion, TransformStamped
 from tf2_ros import TransformBroadcaster
 
-from .ekf import LocalizationEKF
+from .ekf import LocalizationEKF, _wrap
 from .sim_bicycle import BicycleModel
 
 
@@ -53,16 +53,19 @@ def _init_sim(self):
         steer_max = float(self.get_parameter("steer_max_deg").value)
         hz = float(self.get_parameter("system_frequency").value)
 
-        self.model = BicycleModel(wheelbase, v_max, steer_max)
+        # Sim noise + actuator delay
+        def _np(name, default):
+            return float(self._param(name, default))
+
+        steer_slop_deg = _np("sim_steer_slop_deg", 0.0)
+        self.model = BicycleModel(
+            wheelbase, v_max, steer_max, steer_slop_deg=steer_slop_deg,
+        )
         self.dt = 1.0 / hz
 
         # Spawn at racing line start if available
         self._auto_spawn()
 
-        # Sim noise + actuator delay
-        def _np(name, default):
-            return float(self._param(name, default))
-
         self._sim_delay_s = _np("sim_actuator_delay_s", 0.0)
         self._sim_motor_std = _np("sim_cmd_motor_noise_mps", 0.0)
         self._sim_steer_std = _np("sim_cmd_steer_noise_deg", 0.0)
@@ -171,6 +174,7 @@ def _init_real(self):
         self.sigma_xy_floor = float(self._param("sigma_xy_floor", 0.02))
         self.vtg_yaw_var_max = float(self._param("vtg_yaw_var_max", 1.0))
         self.vtg_speed_var_max = float(self._param("vtg_speed_var_max", 100.0))
+        self.reverse_flip_deadband_mps = float(self._param("reverse_flip_deadband_mps", 0.3))
         self.wheel_speed_var = float(self._param("wheel_speed_var", 1.0e-4))
         self.imu_dropout_warn = float(self._param("imu_dropout_warn", 1.0))
 
@@ -178,6 +182,7 @@ def _init_real(self):
         self._last_gps_t = None
         self._imu_seen = False
         self._last_imu_stamp_s = None
+        self._last_wheel_v = None
 
         self.create_subscription(Odometry, "gps", self.gps_callback, 10)
         self.create_subscription(Imu, "imu", self._imu_cb, 5)
@@ -185,6 +190,12 @@ def _init_real(self):
             Float32, "e_comms/kart_speed_m_per_s", self._wheel_speed_cb, 5
         )
 
+        # For snapshotting the EKF before / after GPS event to get accuracy
+        self.gps_event_pub = self.create_publisher(
+            Float32MultiArray, "localization/gps_event", 10,
+        )
+        self._gps_event_seq = 0
+
         # 1 Hz watchdog
         self.create_timer(1.0, self._imu_watchdog)
 
@@ -223,9 +234,10 @@ def _odom_cov_from_P(P: np.ndarray) -> list:
         return cov
 
     def _wheel_speed_cb(self, msg: Float32):
+        self._last_wheel_v = float(msg.data)
         if not self.ekf.initialized:
             return
-        self.ekf.update_speed(float(msg.data), self.wheel_speed_var)
+        self.ekf.update_speed(self._last_wheel_v, self.wheel_speed_var)
 
     def _imu_cb(self, msg: Imu):
         stamp_s = float(msg.header.stamp.sec) + float(msg.header.stamp.nanosec) * 1e-9
@@ -250,10 +262,11 @@ def _imu_cb(self, msg: Imu):
             )
             return
 
-        omega_z = float(msg.angular_velocity.z)
         accel_x = float(msg.linear_acceleration.x)
-        omega_var = float(msg.angular_velocity_covariance[8])
         accel_var = float(msg.linear_acceleration_covariance[0])
+        # gyro_z published by imu_node has the wrong sign for our chassis
+        omega_z = -float(msg.angular_velocity.z)
+        omega_var = float(msg.angular_velocity_covariance[8])
 
         self.ekf.predict(dt, omega_z, accel_x, omega_var, accel_var)
 
@@ -302,6 +315,30 @@ def gps_callback(self, msg: Odometry):
         var_v = float(msg.twist.covariance[0])
         have_yaw = var_yaw < self.vtg_yaw_var_max
         have_speed = var_v < self.vtg_speed_var_max
+        # VTG track is course-over-ground = direction of motion. If the kart
+        # is rolling in reverse (wheel speed negative), COG points 180° from
+        # the body yaw so flip it so EKF is accurate.
+        #
+        # Guards against two failure modes seen on the 2026-05-19 bag:
+        #  (1) VESC ERPM noise at rest can briefly read negative (~−0.1 m/s).
+        #      Require |v| above `reverse_flip_deadband_mps` before trusting
+        #      a signed source for the reverse decision — otherwise sensor
+        #      noise on a parked kart silently rotates the EKF init by π.
+        #  (2) On the very first GPS fix we have no reliable signed speed
+        #      yet (the kart hasn't moved). Skip the flip during EKF init
+        #      entirely; corrections after the kart starts driving will
+        #      converge yaw if a real reverse was happening.
+        if have_yaw and self.ekf.initialized:
+            v_for_sign = None
+            db = self.reverse_flip_deadband_mps
+            # Prefer signed wheel speed when it's clearly above the deadband.
+            if self._last_wheel_v is not None and abs(self._last_wheel_v) > db:
+                v_for_sign = self._last_wheel_v
+            # Otherwise fall back to the EKF's own (already-converged) v.
+            elif abs(float(self.ekf.x[3])) > db:
+                v_for_sign = float(self.ekf.x[3])
+            if v_for_sign is not None and v_for_sign < -db:
+                yaw_meas = _wrap(yaw_meas + math.pi)
 
         if not self.ekf.initialized:
             if not self._imu_seen:
@@ -323,14 +360,35 @@ def gps_callback(self, msg: Odometry):
             self._last_gps_t = t
             return
 
-        # Already initialized: run corrections, do NOT publish
+        # Already initialized: run corrections then publish.
+        # Snapshot the EKF state (IMU+wheel predict only) just before GPS is
+        # folded in, and the posterior after, so /localization/gps_event lets
+        # offline analysis see how far IMU+wheel dead-reckoning had drifted.
+        prior = self.ekf.x.copy()
         self.ekf.update_gps_xy(x, y, R_xy)
         if have_yaw:
             self.ekf.update_heading(yaw_meas, var_yaw)
         if have_speed:
             self.ekf.update_speed(v_meas, var_v)
+        post = self.ekf.x.copy()
         self._last_gps_t = t
 
+        px, py, yaw, v = self.ekf.x
+        cov = self._odom_cov_from_P(self.ekf.P)
+        self._publish_odom(px, py, yaw, v, cov=cov, speed_var=float(self.ekf.P[3, 3]))
+
+        self._gps_event_seq += 1
+        self.gps_event_pub.publish(Float32MultiArray(data=[
+            float(self._gps_event_seq),
+            float(x), float(y), float(yaw_meas), float(v_meas),
+            1.0 if have_yaw else 0.0,
+            1.0 if have_speed else 0.0,
+            float(prior[0]), float(prior[1]),
+            float(prior[2]), float(prior[3]),
+            float(post[0]), float(post[1]),
+            float(post[2]), float(post[3]),
+        ]))
+
     def _publish_odom(self, x: float, y: float, yaw: float, speed: float,
                       cov=None, speed_var=None):
         now = self.get_clock().now().to_msg()
diff --git a/src/autonomous_kart/autonomous_kart/nodes/localization/sim_bicycle.py b/src/autonomous_kart/autonomous_kart/nodes/localization/sim_bicycle.py
index b7f216c6..21464435 100644
--- a/src/autonomous_kart/autonomous_kart/nodes/localization/sim_bicycle.py
+++ b/src/autonomous_kart/autonomous_kart/nodes/localization/sim_bicycle.py
@@ -15,12 +15,17 @@ def __init__(
         steer_max_deg: float,
         accel_tau: float = 0.5,
         brake_tau: float = 0.3,
+        steer_slop_deg: float = 0.0,
     ):
         self.wheelbase = wheelbase_m
         self.v_max = v_max_mps
         self.steer_max_rad = math.radians(steer_max_deg)
         self.accel_tau = accel_tau
         self.brake_tau = brake_tau
+        # Half-width of the steering deadzone (backlash). Actual wheel angle
+        # only updates when the command moves more than this past it.
+        self.steer_slop_rad = math.radians(max(0.0, steer_slop_deg))
+        self.delta_actual_rad = 0.0
 
         self.x = 0.0
         self.y = 0.0
@@ -32,6 +37,7 @@ def reset(self, x: float, y: float, yaw: float):
         self.y = y
         self.yaw = yaw
         self.speed = 0.0
+        self.delta_actual_rad = 0.0
 
     def step(self, target_mps: float, steer_deg: float, dt: float):
         """Advance one timestep. Returns (x, y, yaw, speed)."""
@@ -42,9 +48,18 @@ def step(self, target_mps: float, steer_deg: float, dt: float):
         alpha = dt / (tau + dt)
         self.speed += alpha * (target_v - self.speed)
 
-        # Clamp steering
-        delta = math.radians(steer_deg)
-        delta = max(-self.steer_max_rad, min(self.steer_max_rad, delta))
+        # Clamp commanded steering
+        delta_cmd = math.radians(steer_deg)
+        delta_cmd = max(-self.steer_max_rad, min(self.steer_max_rad, delta_cmd))
+
+        # Mechanical slop / backlash: the actual wheel angle only changes
+        # when the command moves past the deadzone band around it.
+        slop = self.steer_slop_rad
+        if delta_cmd > self.delta_actual_rad + slop:
+            self.delta_actual_rad = delta_cmd - slop
+        elif delta_cmd < self.delta_actual_rad - slop:
+            self.delta_actual_rad = delta_cmd + slop
+        delta = self.delta_actual_rad
 
         # Bicycle kinematics
         self.x += self.speed * math.cos(self.yaw) * dt
diff --git a/src/autonomous_kart/autonomous_kart/nodes/master/master_api.py b/src/autonomous_kart/autonomous_kart/nodes/master/master_api.py
index 1e87db5f..bbd41902 100644
--- a/src/autonomous_kart/autonomous_kart/nodes/master/master_api.py
+++ b/src/autonomous_kart/autonomous_kart/nodes/master/master_api.py
@@ -7,6 +7,7 @@
 from flask import Flask, jsonify, request, send_from_directory
 from flask_cors import CORS
 
+from std_msgs.msg import Float32MultiArray # <-- Added ROS2 message type
 from .master_node import MasterNode, STATES
 
 logging.getLogger("werkzeug").setLevel(logging.ERROR)
@@ -14,6 +15,7 @@
 app = Flask(__name__)
 CORS(app)
 master_node: MasterNode | None = None
+sim_angle_pub = None # <-- Added global publisher variable
 
 @app.after_request
 def cors(response):
@@ -27,10 +29,6 @@ def ping():
 _STATIC_LINE_CACHE = None
 
 def _load_static_line(path):
-    """
-    Load racing line once
-    returns: list of full waypoint dicts
-    """
     global _STATIC_LINE_CACHE
     if _STATIC_LINE_CACHE is not None:
         return _STATIC_LINE_CACHE
@@ -54,6 +52,7 @@ def _load_static_line(path):
                     continue
     _STATIC_LINE_CACHE = rows
     return rows
+
 @app.route("/get_logs", methods=["GET"])
 def get_logs():
     if not master_node:
@@ -66,10 +65,8 @@ def manual_control():
     data = request.get_json()
     if not isinstance(data, dict):
         return jsonify({"error": "invalid or missing JSON body"}), 400
-
     if "speed" not in data or "steering" not in data:
         return jsonify({"error": "missing 'speed' or 'steering' field"}), 400
-
     speed, steering = float(data["speed"]), float(data["steering"])
     master_node.manual_control(speed, steering)
     return jsonify({"success": "ok"})
@@ -80,13 +77,11 @@ def set_state():
     data = request.get_json()
     if not isinstance(data, dict):
         return jsonify({"error": "invalid or missing JSON body"}), 400
-
     if "state" not in data:
         return jsonify({"error": "state field not present"})
     state = data["state"]
     if state not in [s.value for s in STATES]:
         return jsonify({"error": f"state {state} is not a valid state."})
-
     master_node.update_state(state)
     return jsonify({"success": "ok"})
 
@@ -221,6 +216,130 @@ def lines_endpoint():
         "static": static_xy,
         "dynamic": master_node.get_dynamic_line(),
     })
+    
+@app.route("/jetson/hotswap", methods=["POST"])
+def jetson_hotswap():
+    if not master_node:
+        return jsonify({"error": "not initialized"}), 500
+
+    master_node.hotswap_jetson()
+
+    return jsonify({
+        "message": "jetson hotswap started"
+    })
+
+
+@app.route("/jetson/restart", methods=["POST"])
+def jetson_restart():
+    if not master_node:
+        return jsonify({"error": "not initialized"}), 500
+
+    master_node.restart_jetson()
+
+    return jsonify({
+        "message": "jetson restart started"
+    })
+
+
+@app.route("/jetson/update", methods=["POST"])
+def jetson_update():
+    if not master_node:
+        return jsonify({"error": "not initialized"}), 500
+
+    master_node.update_jetson()
+
+    return jsonify({
+        "message": "jetson update started"
+    })
+
+
+@app.route("/jetson/rebuild", methods=["POST"])
+def jetson_rebuild():
+    if not master_node:
+        return jsonify({"error": "not initialized"}), 500
+
+    master_node.rebuild_jetson()
+
+    return jsonify({
+        "message": "jetson rebuild started"
+    })
+
+
+@app.route("/residual/status", methods=["GET"])
+def residual_status():
+    if not master_node:
+        return jsonify({"error": "master node not initialized"}), 500
+    snap = master_node.get_residual_status()
+    if snap is None:
+        return jsonify({"status": "no /mpc/status received yet"}), 200
+    return jsonify(snap), 200
+
+
+@app.route("/residual/log", methods=["GET"])
+def residual_log():
+    if not master_node:
+        return jsonify({"error": "master node not initialized"}), 500
+    try:
+        limit = int(request.args.get("limit", 50))
+    except ValueError:
+        limit = 50
+    return jsonify({"events": master_node.get_residual_log(limit=limit)}), 200
+
+
+@app.route("/residual/log/stream", methods=["GET"])
+def residual_log_stream():
+    if not master_node:
+        return ("master node not initialized", 500)
+    from flask import Response
+    import json as _json
+    import time as _time
+
+    def gen():
+        last_seen = -1
+        while True:
+            events = master_node.get_residual_log(limit=200)
+            # events are newest-first; emit oldest-of-the-new-ones first
+            new = [e for e in reversed(events) if e["train_seq"] > last_seen]
+            for e in new:
+                yield f"event: train\ndata: {_json.dumps(e)}\n\n"
+                last_seen = e["train_seq"]
+            _time.sleep(1.0)
+
+    return Response(gen(), mimetype="text/event-stream")
+
+
+@app.route("/residual/revert", methods=["POST"])
+def residual_revert():
+    if not master_node:
+        return jsonify({"error": "master node not initialized"}), 500
+    master_node.trigger_residual_revert()
+    return ("", 202)
+
+# ─── UPDATED FLASK ROUTE (USES GLOBAL API NODE PUBLISHER) ───
+@app.route("/camera_angles", methods=["POST"])
+def receive_camera_angles():
+    global sim_angle_pub, master_node
+    if not master_node or not sim_angle_pub:
+        return jsonify({"error": "ROS elements not fully initialized"}), 500
+        
+    data = request.get_json(silent=True) or {}
+    left_angle = data.get("left_angle", 22.5)
+    right_angle = data.get("right_angle", 22.5)
+
+    print(f"[DEBUG] Received Frontend Angles -> L: {left_angle:.2f}°, R: {right_angle:.2f}°", flush=True)
+    
+    # Pack up the ROS2 message right here inside the API thread
+    msg = Float32MultiArray()
+    msg.data = [float(left_angle), float(right_angle)]
+    
+    # Publish directly via the global publisher attached to master_node
+    sim_angle_pub.publish(msg)
+    
+    return jsonify({
+        "status": "published", 
+        "left": left_angle, 
+        "right": right_angle
+    })
 
 
 def start(node: MasterNode) -> None:
@@ -229,22 +348,35 @@ def start(node: MasterNode) -> None:
     except (KeyboardInterrupt, ExternalShutdownException):
         pass
     finally:
-        # Take the whole process down so Flask doesn't keep serving stale ROS state.
         os._exit(0)
 
 
 def main():
-    global master_node
+    global master_node, sim_angle_pub
 
     rclpy.init()
     master_node = MasterNode()
-    ros_thread = threading.Thread(target=start, args=(master_node,), daemon=True)
+    
+    sim_angle_pub = master_node.create_publisher(
+        Float32MultiArray, 
+        "sim/camera_angles", 
+        10
+    )
+
+    ros_thread = threading.Thread(target=rclpy.spin, args=(master_node,), daemon=True)
     ros_thread.start()
 
-    app.run(host="0.0.0.0", port=8000, debug=False)
-
-    rclpy.shutdown()
-
+    try:
+        # Enforce threaded execution to handle high-frequency raycasts asynchronously
+        app.run(host="0.0.0.0", port=8000, debug=False, threaded=True)
+    except Exception as e:
+        print(f"Flask server failed to start: {e}")
+    finally:
+        # ─── SAFE SHUTDOWN SEQUENCE ───
+        # Destroy the node to shake rclpy.spin() loose before tearing down the context
+        if 'master_node' in locals():
+            master_node.destroy_node()
+        rclpy.shutdown()
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file
diff --git a/src/autonomous_kart/autonomous_kart/nodes/master/master_node.py b/src/autonomous_kart/autonomous_kart/nodes/master/master_node.py
index d192933a..46230e4c 100644
--- a/src/autonomous_kart/autonomous_kart/nodes/master/master_node.py
+++ b/src/autonomous_kart/autonomous_kart/nodes/master/master_node.py
@@ -1,6 +1,7 @@
 import json
 import threading
 import time
+import subprocess
 from enum import Enum
 
 import rclpy
@@ -33,6 +34,10 @@ def __init__(self):
         assert self.state in [s.value for s in STATES]
 
         self._lock = threading.Lock()
+        # Residual training-event log queue (capped at 200)
+        self._residual_log: list[dict] = []
+        self._residual_log_max = 200
+        self._last_train_seq = -1
         self._pub_lock = threading.Lock()
         self._logs = []
 
@@ -131,6 +136,7 @@ def __init__(self):
         self.residual_mode_publisher = self.create_publisher(String, "mpc/residual_mode", 1)
         self.planner_publisher = self.create_publisher(String, "pathfinder/planner", 1)
         self.line_publisher = self.create_publisher(String, "pathfinder/line_path", 1)
+        self.residual_revert_publisher = self.create_publisher(Empty, "mpc/residual_revert", 1)
 
         # IMU calibration plumbing
         self.imu_calibrate_publisher = self.create_publisher(Empty, "imu/calibrate", 1)
@@ -164,8 +170,10 @@ def get_dynamic_line(self):
 
     def _mpc_status_callback(self, msg: Float32MultiArray):
         data = list(msg.data)
-        if len(data) < 44:
+        if len(data) < 63:
             return
+        # Phase-1 residual telemetry lives in cols 63..87 of the new 88-col payload.
+        # Legacy 63-col logs still parse; the extras stay empty in that case.
         snapshot = {
             "received": True,
             "mode": int(data[0]),
@@ -214,7 +222,79 @@ def _mpc_status_callback(self, msg: Float32MultiArray):
             "kappa_local": float(data[41]),
             "consec_failures": int(data[42]),
             "corridor_half": float(data[43]),
+            # GPS pose passthrough (indices 47..50; 44..46 are residual telemetry).
+            "gps_x": float(data[47]),
+            "gps_y": float(data[48]),
+            "gps_yaw_rad": float(data[49]),
+            "gps_v": float(data[50]),
+            # Wheel speed + last-GPS-event EKF prior/posterior snapshot.
+            # gps_event_seq increments per /localization/gps_event publish.
+            "wheel_speed_mps": float(data[51]),
+            "gps_event_seq": int(data[52]),
+            "gps_have_yaw": bool(data[53] > 0.5),
+            "gps_have_speed": bool(data[54] > 0.5),
+            "ekf_prior_x": float(data[55]),
+            "ekf_prior_y": float(data[56]),
+            "ekf_prior_yaw_rad": float(data[57]),
+            "ekf_prior_v": float(data[58]),
+            "ekf_post_x": float(data[59]),
+            "ekf_post_y": float(data[60]),
+            "ekf_post_yaw_rad": float(data[61]),
+            "ekf_post_v": float(data[62]),
         }
+        if len(data) >= 88:
+            snapshot["train_buffer_size"] = int(data[63])
+            snapshot["train_buffer_capacity"] = int(data[64])
+            snapshot["last_train_wall_ms"] = float(data[65])
+            snapshot["last_train_n_samples"] = int(data[66])
+            snapshot["train_seq"] = int(data[67])
+            snapshot["last_train_mae_s"] = float(data[68])
+            snapshot["last_train_mae_d"] = float(data[69])
+            snapshot["last_val_mae_s"] = float(data[70])
+            snapshot["last_val_mae_d"] = float(data[71])
+            snapshot["last_rls_val_mae_s"] = float(data[72])
+            snapshot["last_rls_val_mae_d"] = float(data[73])
+            snapshot["active_model"] = int(data[74])
+            snapshot["pred_clipped"] = int(data[75])
+            snapshot["clip_rate_window"] = float(data[76])
+            snapshot["outliers_dropped_total"] = int(data[77])
+            snapshot["off_line_skipped_total"] = int(data[78])
+            snapshot["divergence_resets_total"] = int(data[79])
+            snapshot["samples_accepted_total"] = int(data[80])
+            snapshot["revert_count_total"] = int(data[81])
+            snapshot["best_val_mae_s"] = float(data[82])
+            snapshot["best_val_mae_d"] = float(data[83])
+            snapshot["cache_loaded"] = int(data[84])
+            snapshot["rls_warmed_up"] = int(data[85])
+            snapshot["effective_mode"] = int(data[86])
+            snapshot["samples_accepted_this_run"] = int(data[87])
+        if "train_seq" in snapshot:
+            seq = int(snapshot["train_seq"])
+            if seq > self._last_train_seq:
+                event = {
+                    "t_wall_ns": int(self.get_clock().now().nanoseconds),
+                    "train_seq": seq,
+                    "n_samples": snapshot.get("last_train_n_samples", 0),
+                    "train_wall_ms": snapshot.get("last_train_wall_ms", float("nan")),
+                    "train_mae_s": snapshot.get("last_train_mae_s", float("nan")),
+                    "train_mae_d": snapshot.get("last_train_mae_d", float("nan")),
+                    "val_mae_s": snapshot.get("last_val_mae_s", float("nan")),
+                    "val_mae_d": snapshot.get("last_val_mae_d", float("nan")),
+                    "rls_val_mae_s": snapshot.get("last_rls_val_mae_s", float("nan")),
+                    "rls_val_mae_d": snapshot.get("last_rls_val_mae_d", float("nan")),
+                    "active_model": "gbm" if snapshot.get("active_model") == 1 else "rls",
+                    "buffer_size": snapshot.get("train_buffer_size", 0),
+                    "outliers_dropped_total": snapshot.get("outliers_dropped_total", 0),
+                    "off_line_skipped_total": snapshot.get("off_line_skipped_total", 0),
+                    "divergence_resets_total": snapshot.get("divergence_resets_total", 0),
+                    "samples_accepted_total": snapshot.get("samples_accepted_total", 0),
+                    "revert_count_total": snapshot.get("revert_count_total", 0),
+                }
+                with self._lock:
+                    self._residual_log.insert(0, event)
+                    if len(self._residual_log) > self._residual_log_max:
+                        self._residual_log = self._residual_log[: self._residual_log_max]
+                self._last_train_seq = seq
         with self._lock:
             self.mpc_status_data = snapshot
 
@@ -371,6 +451,64 @@ def get_gps_status(self):
         with self._lock:
             return dict(self.gps_status_data)
 
+    # Jetson remote management
+    def _ssh_fire(self, remote_command: str, log_msg: str):
+        """Shared helper: open a non-blocking SSH subprocess to the Jetson."""
+        ssh_command = [
+                "ssh", "-t",
+                "-o", "StrictHostKeyChecking=no",  
+                "-o", "UserKnownHostsFile=/dev/null",
+                f"{self.jetson_user}@{self.jetson_ip}",
+                remote_command,
+            ]
+
+        self.logger.info(log_msg)
+        subprocess.Popen(ssh_command)
+
+    def hotswap_jetson(self):
+        """Git-pull and rebuild inside the *running* container, then relaunch bringup_rubik.
+        The container is never stopped — fastest way to pick up new code mid-session."""
+        self._ssh_fire(
+            "bash ~/run_remote.sh --hotswap",
+            "Hotswapping: pulling latest code and relaunching inside running container...",
+        )
+
+    def restart_jetson(self):
+        """Stop the container, rebuild the ROS workspace, and relaunch bringup.
+        No git pull, no Docker image pull — fastest way to bounce the stack."""
+        self._ssh_fire(
+            "bash ~/run_remote.sh --restart",
+            "Restarting Jetson container (no code update)...",
+        )
+
+    def update_jetson(self):
+        """Git-pull the latest code, then restart.
+        Use this before a session to pick up new commits without a full image rebuild."""
+        self._ssh_fire(
+            "bash ~/run_remote.sh --update",
+            "Pulling latest code and restarting Jetson...",
+        )
+
+    def rebuild_jetson(self):
+        """Pull the latest Docker image, git-pull code, then restart.
+        Use this after a Docker image has been pushed (e.g. new deps or base OS change)."""
+        self._ssh_fire(
+            "bash ~/run_remote.sh --rebuild",
+            "Pulling latest Docker image + code and rebuilding Jetson...",
+        )
+
+    def get_residual_log(self, limit: int = 50) -> list[dict]:
+        with self._lock:
+            return list(self._residual_log[: max(0, limit)])
+
+    def get_residual_status(self) -> dict | None:
+        with self._lock:
+            if not self.mpc_status_data:
+                return None
+            return dict(self.mpc_status_data)
+
+    def trigger_residual_revert(self) -> None:
+        self.residual_revert_publisher.publish(Empty())
 
 def main(args=None):
     rclpy.init(args=args)
diff --git a/src/autonomous_kart/autonomous_kart/nodes/opencv_pathfinder/angle.py b/src/autonomous_kart/autonomous_kart/nodes/opencv_pathfinder/angle.py
index 47384593..5fb58ad8 100644
--- a/src/autonomous_kart/autonomous_kart/nodes/opencv_pathfinder/angle.py
+++ b/src/autonomous_kart/autonomous_kart/nodes/opencv_pathfinder/angle.py
@@ -19,18 +19,21 @@ def __init__(self, logger):
     # @param img: Normal image
     # @param log_folder: Directory for write debug info
     # @param debug: Draws lines on image
-    # @param percent: Percent of the road to cutoff (top down)
+    # @param bottom_percent: Percent of the road to cutoff from bottom
+    # @param top_per: Percent of the road to cutoff from top
     # @param pixel_range: Range of pixels to check around previous pixel for O(1) lookup
     # @param pic_offset: Pixel offset from edge of image
     # @ret image angles or None when cannot find angles/coords
-    def get_img_angles(self, img, frame_count=-1, log_folder="logs", debug=False, percent=0.0, pixel_range=3, pic_offset=5, capture_frequency=100):
-        image, right, left = self.get_img_mask(img, percent=percent, pixel_range=pixel_range, pic_offset=pic_offset)
+    def get_img_angles(self, img, frame_count=-1, log_folder="logs", debug=False, bottom_per=0.0, top_per=0.0, pixel_range=3, pic_offset=5, capture_frequency=100):
+        image, right, left = self.get_img_mask(img, bottom_per=bottom_per, top_per=top_per, pixel_range=pixel_range, pic_offset=pic_offset)
         
         if image is None or right is None or left is None:
             return (None, None)
 
-        w = img.shape[1]
-        width = w - 1 - pic_offset
+        h, w = img.shape[:2]
+        height, width = h - 1 - pic_offset, w - 1 - pic_offset
+        top_y_index = int(height * top_per)
+        bottom_y_index = height - int(height * bottom_per)
 
         right_deg = utils.get_angle((width, pic_offset), (width // 2, pic_offset), right)
         left_deg = utils.get_angle((pic_offset, pic_offset), (width // 2, pic_offset), left)
@@ -69,11 +72,11 @@ def get_img_angles(self, img, frame_count=-1, log_folder="logs", debug=False, pe
 
     # @param img: Normal image
     # @param debug: Draws lines on image
-    # @param percent: Percent of the road to cutoff (top down)
+    # @param top_per: Percent of the road to cutoff from top
     # @param pixel_range: Range of pixels to check around previous pixel for O(1) lookup
     # @param pic_offset: Pixel offset from edge of image
     # @ret Masked image & right/left divide between road & grass or None when image doesn't exist
-    def get_img_mask(self, img: np.ndarray, percent=0.0, pixel_range=3, pic_offset=5):
+    def get_img_mask(self, img: np.ndarray, bottom_per=0.0, top_per=0.0, pixel_range=3, pic_offset=5):
         if img is None:
             self.logger.error("No Image Provided as Input to get_img_mask")
             return (None, None, None)
@@ -81,9 +84,14 @@ def get_img_mask(self, img: np.ndarray, percent=0.0, pixel_range=3, pic_offset=5
         # Roi mask for a portion of image
         h, w = img.shape[:2]
         height, width = h - 1 - pic_offset, w - 1 - pic_offset
-        y_index = int(height * percent)
+        top_y_index = int(height * top_per)
+        bottom_y_index = height - int(height * bottom_per)
+
+        if bottom_y_index <= top_y_index:
+            self.logger.warning("ROI Image Bounds Collide")
+            return (None, None, None)
 
-        roi_image = img[y_index:height, pic_offset:width]
+        roi_image = img[top_y_index:bottom_y_index, pic_offset:width]
 
         # Get hue mask
         image_hsv = utils.convert_bgr_to_hsv(roi_image)
@@ -103,7 +111,7 @@ def get_img_mask(self, img: np.ndarray, percent=0.0, pixel_range=3, pic_offset=5
             right = self.vectorized_road_coord(result, right_side=True)
         
         if not left:
-            left = self. vectorized_road_coord(result, right_side=False)
+            left = self.vectorized_road_coord(result, right_side=False)
 
         self.prev_right = right
         self.prev_left = left
@@ -197,61 +205,56 @@ def lookup_road_coord(self, img, prev_pixel, right_side=True, pixel_range=6, pic
     # @param pixel_range: Range of pixels to check around previous pixel for O(1) lookup
     # @param pic_offset: Pixel offset from edge of image
     # @ret video or None
-    def get_video_mask(self, vid, debug=False, percent=0.0, pixel_range=3, pic_offset=5):
-        if vid is None:
-            self.logger.error("Error opening video")
-            return None
+    # def get_video_mask(self, vid, debug=False, top_per=0.0, bottom_per=0.0, pixel_range=3, pic_offset=5):
+    #     if vid is None:
+    #         self.logger.error("Error opening video")
+    #         return None
         
-        r, f = vid.read()
-        if not r:
-            self.logger.error("Can't get initial video frame")
-            return None
-        h, w = f.shape[:2]
-        height, width = h - 1 - pic_offset, w - 1 - pic_offset
+    #     r, f = vid.read()
+    #     if not r:
+    #         self.logger.error("Can't get initial video frame")
+    #         return None
+    #     h, w = f.shape[:2]
+    #     width = w - 1 - pic_offset
 
-        fps = vid.get(cv.CAP_PROP_FPS)
-        if fps == 0:
-            fps = 30
+    #     fps = vid.get(cv.CAP_PROP_FPS)
+    #     if fps == 0:
+    #         fps = 30
 
-        video = cv.VideoWriter("labeled_video.mp4", cv.VideoWriter_fourcc(*'mp4v'), fps, (w, h), isColor=False) 
-        vid.set(cv.CAP_PROP_POS_FRAMES, 0)
+    #     video = cv.VideoWriter("labeled_video.mp4", cv.VideoWriter_fourcc(*'mp4v'), fps, (w, h), isColor=False) 
+    #     vid.set(cv.CAP_PROP_POS_FRAMES, 0)
 
-        while (True):
-            ret, frame = vid.read()
+    #     while (True):
+    #         ret, frame = vid.read()
 
-            if not ret:
-                break
+    #         if not ret:
+    #             break
                 
-            result, cur_right, cur_left = self.get_img_mask(frame, debug=debug, percent=percent, pic_offset=pic_offset, pixel_range=pixel_range)
-            
-            result = np.zeros((h, w), dtype=result.dtype)
-
-            height = h - 1 - pic_offset
-            width = w - 1 - pic_offset
-            y_index = int(height * percent)
+    #         result, cur_right, cur_left = self.get_img_mask(frame, debug=debug, top_per=top_per, bottom_per=bottom_per, pic_offset=pic_offset, pixel_range=pixel_range)
+    #         result = cv.cvtColor(result, cv.COLOR_GRAY2BGR)
 
-            result[y_index:height, pic_offset:width] = result
+    #         width = w - 1 - pic_offset
 
-            # Write debug info on video
-            if debug:
-                right_deg = utils.get_angle((width, pic_offset), (width // 2, pic_offset), cur_right)
-                left_deg = utils.get_angle((pic_offset, pic_offset), (width // 2, pic_offset), cur_left)
+    #         # Write debug info on video
+    #         if debug:
+    #             right_deg = utils.get_angle((width, pic_offset), (width // 2, pic_offset), cur_right)
+    #             left_deg = utils.get_angle((pic_offset, pic_offset), (width // 2, pic_offset), cur_left)
 
-                if right_deg is None:
-                    right_deg = -1
+    #             if right_deg is None:
+    #                 right_deg = -1
                 
-                if left_deg is None:
-                    left_deg = -1
+    #             if left_deg is None:
+    #                 left_deg = -1
             
-                pos = (cur_left, cur_right)
-                degrees = (left_deg, right_deg)
-                self.write_debug_info(result, pos, degrees)
+    #             pos = (cur_left, cur_right)
+    #             degrees = (left_deg, right_deg)
+    #             self.write_debug_info(result, pos, degrees)
 
-            video.write(result)
+    #         video.write(result)
         
-        vid.release()
-        video.release()
-        cv.destroyAllWindows()
+    #     vid.release()
+    #     video.release()
+    #     cv.destroyAllWindows()
 
     def write_debug_info(self, image, pos, degrees):
         cur_left, cur_right = pos
diff --git a/src/autonomous_kart/autonomous_kart/nodes/opencv_pathfinder/opencv_pathfinder_node.py b/src/autonomous_kart/autonomous_kart/nodes/opencv_pathfinder/opencv_pathfinder_node.py
index 88ebd087..5faf065d 100644
--- a/src/autonomous_kart/autonomous_kart/nodes/opencv_pathfinder/opencv_pathfinder_node.py
+++ b/src/autonomous_kart/autonomous_kart/nodes/opencv_pathfinder/opencv_pathfinder_node.py
@@ -1,6 +1,7 @@
 import traceback
 import rclpy
 import os
+import math
 
 from cv_bridge import CvBridge
 from rclpy.duration import Duration
@@ -29,12 +30,19 @@ def __init__(self):
         self.system_frequency = self.get_parameter("system_frequency").value
         self.debug_mode = self.get_parameter("debug_mode").value
         self.pic_offset = self.get_parameter("pic_offset").value
-        self.percent_of_img = self.get_parameter("percent_of_img").value
+        self.top_per = self.get_parameter("top_per").value
+        self.bottom_per = self.get_parameter("bottom_per").value
         self.pixel_range = self.get_parameter("pixel_range").value
         self.capture_frequency = self.get_parameter("capture_frequency").value
         self.log_dir = self.get_parameter("log_dir").value
         self.log_file = self.get_parameter("log_file").value
 
+        # NEW: Parameter to toggle between live camera processing and simulator raycasts
+        # Declare it with a default of False so it doesn't break physical kart deployments
+        if not self.has_parameter("use_sim_vision"):
+            self.declare_parameter("use_sim_vision", False)
+        self.use_sim_vision = self.get_parameter("use_sim_vision").value
+
         self.log_folder = os.path.join(self.log_dir, self.log_file)
 
         qos = QoSProfile(
@@ -44,54 +52,85 @@ def __init__(self):
             lifespan=Duration(seconds=0, nanoseconds=int(1e9 / self.system_frequency)),
         )
 
-        # Subscribe to camera
-        self.image_sub = self.create_subscription(
-            Image, "camera/image_raw", self.image_callback, qos
-        )
-
-        # Publishes two angles in (left angle from center of vision to base of track line, right angle ...)
+        # Publishes two angles: [left_angle, right_angle]
         self.angle_pub = self.create_publisher(
             Float32MultiArray,
             "track_angles",
             5,
         )
 
-        self.logger.info("Pathfinder Node started - subscribed to /camera/image_raw")
+        # Conditional Routing based on execution environment
+        if self.use_sim_vision:
+            self.logger.info("Pathfinder Node running in SIM MODE - subscribing to /sim/camera_angles")
+            self.sim_sub = self.create_subscription(
+                Float32MultiArray, 
+                "sim/camera_angles", 
+                self.sim_angles_callback, 
+                10
+            )
+        else:
+            self.logger.info("Pathfinder Node running in LIVE MODE - subscribing to /camera/image_raw")
+            self.image_sub = self.create_subscription(
+                Image, "camera/image_raw", self.image_callback, qos
+            )
 
     def image_callback(self, msg):
+        """Processes real image frames from physical hardware or high-fidelity gazebos."""
         frame = self.bridge.imgmsg_to_cv2(msg, "passthrough")
         self.frame_count += 1
         self.frames_since_last_log += 1
 
-        # Calculate FPS every second
         current_time = self.get_clock().now().nanoseconds
         elapsed = (current_time - self.last_log_time) / 1e9
 
-        if elapsed >= 1.0:  # Log every second
+        if elapsed >= 1.0:
             fps = self.frames_since_last_log / elapsed
-            self.logger.info(
-                f"Receiving {fps:.1f} fps | Total frames: {self.frame_count}"
-            )
+            self.logger.info(f"Receiving {fps:.1f} fps | Total frames: {self.frame_count}")
             self.last_log_time = current_time
             self.frames_since_last_log = 0
 
-        right_angle, left_angle = self.angle_finder.get_img_angles(frame, log_folder=self.log_folder, frame_count=self.frame_count, capture_frequency=self.capture_frequency, 
-                                                                   debug=self.debug_mode, percent=self.percent_of_img, pixel_range=self.pixel_range, pic_offset=self.pic_offset)
+        right_angle, left_angle = self.angle_finder.get_img_angles(
+            frame, 
+            log_folder=self.log_folder, 
+            frame_count=self.frame_count, 
+            capture_frequency=self.capture_frequency,
+            debug=self.debug_mode, 
+            top_per=self.top_per, 
+            bottom_per=self.bottom_per,
+            pixel_range=self.pixel_range, 
+            pic_offset=self.pic_offset
+        )
 
         if right_angle is None or left_angle is None:
             self.logger.warn("Right or Left angle returned None")
 
-        msg = Float32MultiArray()
-        msg.data = [
-            float(right_angle) if right_angle is not None else float('nan'),
-            float(left_angle) if left_angle is not None else float('nan'),
+        self.publish_angles(left_angle, right_angle)
+
+    def sim_angles_callback(self, msg):
+        """Processes pre-calculated geometric intersection rays direct from viz.html."""
+        if len(msg.data) >= 2:
+            left_angle = msg.data[0]
+            right_angle = msg.data[1]
+            
+            # Simple telemetry frame logging every 20 callbacks (~1 second at 20Hz)
+            self.frame_count += 1
+            if self.frame_count % 20 == 0:
+                self.logger.info(f"[SIM VISION] Catching geometric sweeps -> L: {left_angle:.1f}°, R: {right_angle:.1f}°")
+                
+            self.publish_angles(left_angle, right_angle)
+
+    def publish_angles(self, left, right):
+        """Unified internal publisher for downstream nodes (like your MPC/PID planner)."""
+        out_msg = Float32MultiArray()
+        out_msg.data = [
+            float(left) if left is not None else float('nan'),
+            float(right) if right is not None else float('nan'),
         ]
-        self.angle_pub.publish(msg)
+        self.angle_pub.publish(out_msg)
 
 
 def main(args=None):
     rclpy.init(args=args)
-
     node = OpenCVPathfinderNode()
     executor = MultiThreadedExecutor(num_threads=2)
     executor.add_node(node)
@@ -103,12 +142,10 @@ def main(args=None):
     except Exception:
         node.get_logger().error(traceback.format_exc())
     finally:
-        node.running = False
         executor.shutdown(timeout_sec=1.0)
         node.destroy_node()
         if rclpy.ok():
             rclpy.shutdown()
 
-
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file
diff --git a/src/autonomous_kart/autonomous_kart/nodes/pathfinder/pathfinder_node.py b/src/autonomous_kart/autonomous_kart/nodes/pathfinder/pathfinder_node.py
index 7da78d39..0b8b784f 100644
--- a/src/autonomous_kart/autonomous_kart/nodes/pathfinder/pathfinder_node.py
+++ b/src/autonomous_kart/autonomous_kart/nodes/pathfinder/pathfinder_node.py
@@ -6,7 +6,8 @@
 import rclpy
 from nav_msgs.msg import Odometry
 from rclpy.node import Node
-from std_msgs.msg import Float32MultiArray, String
+from std_msgs.msg import Float32, Float32MultiArray, String
+from std_msgs.msg import Empty as _EmptyMsg # naming conflict
 
 from autonomous_kart.nodes.master.master_node import STATES
 from autonomous_kart.nodes.pathfinder.planners.base import KartConstants, PlannerInputs
@@ -71,22 +72,51 @@ def __init__(self):
         self.current_yaw = 0.0
         self.current_speed_mps = 0.0
         self._latest_track_angles: Optional[Tuple[float, ...]] = None
-
-        # Shared residual learner — survives planner / line swaps so training
-        # state persists across mode changes.
+        # Raw GPS pose cache, passed through to MPC for offline EKF-vs-GPS diff.
+        self.gps_xy: Tuple[float, float] = (math.nan, math.nan)
+        self.gps_yaw = math.nan
+        self.gps_speed_mps = math.nan
+        # Wheel speed from VESC + per-GPS EKF prior/posterior snapshot. Same
+        # idea: ride through mpc/status so one log captures everything.
+        self.wheel_speed_mps = math.nan
+        self.gps_event_seq = 0.0
+        self.gps_have_yaw = 0.0
+        self.gps_have_speed = 0.0
+        self.ekf_prior_xy: Tuple[float, float] = (math.nan, math.nan)
+        self.ekf_prior_yaw = math.nan
+        self.ekf_prior_v = math.nan
+        self.ekf_post_xy: Tuple[float, float] = (math.nan, math.nan)
+        self.ekf_post_yaw = math.nan
+        self.ekf_post_v = math.nan
+
+        # Shared ResidualLearner — persists across planner / line swaps so the
+        # residual keeps training while pure_pursuit drives, and survives a
+        # planner switch to MPC. Constructed once here, passed by reference
+        # to every (re)built MPCPlanner instance.
         residual_params = {
             k: p.value
             for k, p in self.get_parameters_by_prefix("mpc.residual").items()
         }
-        self.shared_residual = ResidualLearner(residual_params, 1.0 / self.system_frequency)
+        # s_total = racing-line arc length, needed for the residual's seam
+        # unwrap. Falls back to 0 (= no unwrap) if the line is empty.
+        s_total = float(self.racing_line[-1][0]) if self.racing_line else 0.0
+        self.shared_residual = ResidualLearner(
+            residual_params, 1.0 / self.system_frequency, s_total=s_total,
+        )
 
-        # All planners are constructed; `active_planner_name` selects which
-        # one drives cmd_drive. MPC still ticks every frame for telemetry.
+        # Active planner (and the others — all built, only one drives commands).
         self.active_planner_name = self._param("planner", "pure_pursuit", str)
         if self.active_planner_name not in PLANNERS:
             raise ValueError(
                 f"Unknown planner '{self.active_planner_name}'. Available: {list(PLANNERS)}"
             )
+        # Per-planner steering gain (planner-deg -> hardware-deg). Applied
+        # before publishing cmd_drive so downstream (e_comms, sim bicycle)
+        # sees hardware-frame steering regardless of which planner is active.
+        self._planner_gains = {
+            name: self._param(f"{name}.steering_gain", 1.0, float) for name in PLANNERS
+        }
+        self.steering_gain = self._planner_gains[self.active_planner_name]
         self.planners: dict = {}
         if self.racing_line:
             self.planners = self._build_planners()
@@ -104,10 +134,19 @@ def __init__(self):
 
         # Subscriptions
         self.create_subscription(Odometry, "odom", self._on_odom, 10)
+        self.create_subscription(Odometry, "gps", self._on_gps, 10)
+        self.create_subscription(
+            Float32MultiArray, "localization/gps_event", self._on_gps_event, 10,
+        )
+        self.create_subscription(
+            Float32, "e_comms/kart_speed_m_per_s", self._on_wheel_speed, 10,
+        )
         self.create_subscription(Float32MultiArray, "track_angles", self._on_track_angles, 5)
         self.create_subscription(String, "system_state", self.update_state, 10)
         self.create_subscription(Float32MultiArray, "manual_commands", self.manual_loop, 5)
+        self.create_subscription(Float32MultiArray, "track_angles", self._on_track_angles, 5)
         self.create_subscription(String, "mpc/residual_mode", self._on_residual_mode, 1)
+        self.create_subscription(_EmptyMsg, "mpc/residual_revert", self._on_residual_revert, 1)
         self.create_subscription(String, "pathfinder/planner", self._on_planner_swap, 1)
         self.create_subscription(String, "pathfinder/line_path", self._on_line_swap, 1)
 
@@ -133,6 +172,31 @@ def _on_odom(self, msg: Odometry):
         self.current_speed_mps = float(msg.twist.twist.linear.x)
         self.pose_ready = True
 
+    def _on_gps(self, msg: Odometry):
+        p = msg.pose.pose.position
+        self.gps_xy = (float(p.x), float(p.y))
+        self.gps_yaw = self._yaw_from_quaternion(msg.pose.pose.orientation)
+        self.gps_speed_mps = float(msg.twist.twist.linear.x)
+
+    def _on_gps_event(self, msg: Float32MultiArray):
+        # Payload layout matches localization_node.gps_callback.
+        d = msg.data
+        if len(d) < 15:
+            return
+        self.gps_event_seq = float(d[0])
+        # d[1:5] is the GPS measurement; already captured via _on_gps.
+        self.gps_have_yaw = float(d[5])
+        self.gps_have_speed = float(d[6])
+        self.ekf_prior_xy = (float(d[7]), float(d[8]))
+        self.ekf_prior_yaw = float(d[9])
+        self.ekf_prior_v = float(d[10])
+        self.ekf_post_xy = (float(d[11]), float(d[12]))
+        self.ekf_post_yaw = float(d[13])
+        self.ekf_post_v = float(d[14])
+
+    def _on_wheel_speed(self, msg: Float32):
+        self.wheel_speed_mps = float(msg.data)
+
     def _on_track_angles(self, msg: Float32MultiArray):
         self._latest_track_angles = tuple(msg.data) if msg.data else None
 
@@ -144,6 +208,14 @@ def _on_residual_mode(self, msg: String):
         self.shared_residual.mode = mode
         self.logger.info(f"residual mode -> {mode}")
 
+    def _on_residual_revert(self, _msg):
+        """Operator-explicit revert. Bypasses regime check."""
+        if hasattr(self.shared_residual, "manual_revert"):
+            self.shared_residual.manual_revert()
+            self.logger.warning("mpc/residual_revert received -> manual_revert() invoked")
+        else:
+            self.logger.warning("mpc/residual_revert received but learner has no manual_revert() yet")
+
     def _on_planner_swap(self, msg: String):
         name = (msg.data or "").strip().lower()
         if name not in PLANNERS:
@@ -153,6 +225,7 @@ def _on_planner_swap(self, msg: String):
             self.logger.warning(f"pathfinder/planner: '{name}' not constructed")
             return
         self.active_planner_name = name
+        self.steering_gain = self._planner_gains.get(name, 1.0)
         self.logger.info(f"active planner -> {name}")
 
     def _on_line_swap(self, msg: String):
@@ -163,6 +236,8 @@ def _on_line_swap(self, msg: String):
             return
         self.line_path = path
         self.racing_line = new_line
+        # Keep the residual's seam-unwrap calibrated to the new line length
+        self.shared_residual.s_total = float(new_line[-1][0])
         # Rebuild planners on the new line; shared residual persists.
         self.planners = self._build_planners()
         self.logger.info(f"line -> {path} ({len(new_line)} pts)")
@@ -194,9 +269,24 @@ def _autonomous_tick(self):
             speed_mps=self.current_speed_mps,
             track_angles=self._latest_track_angles,
             now_ns=self.get_clock().now().nanoseconds,
+            gps_xy=self.gps_xy,
+            gps_yaw_rad=self.gps_yaw,
+            gps_speed_mps=self.gps_speed_mps,
+            wheel_speed_mps=self.wheel_speed_mps,
+            gps_event_seq=self.gps_event_seq,
+            gps_have_yaw=self.gps_have_yaw,
+            gps_have_speed=self.gps_have_speed,
+            ekf_prior_xy=self.ekf_prior_xy,
+            ekf_prior_yaw_rad=self.ekf_prior_yaw,
+            ekf_prior_v=self.ekf_prior_v,
+            ekf_post_xy=self.ekf_post_xy,
+            ekf_post_yaw_rad=self.ekf_post_yaw,
+            ekf_post_v=self.ekf_post_v,
         )
-
-        # Always run MPC.plan() so mpc/status keeps flowing for telemetry, regardless of state or active planner
+        # Always run MPC.plan() every tick so mpc/status keeps flowing for
+        # telemetry, regardless of system state or which planner is active.
+        # The return value is only used to drive cmd_drive when MPC is active
+        # AND state == AUTONOMOUS; otherwise it's discarded.
         mpc = self.planners.get(MPCPlanner.name)
         mpc_proposed = None
         if mpc is not None:
@@ -222,19 +312,21 @@ def _autonomous_tick(self):
         motor_mps, steering_deg = safe
         self.cmd_count += 1
         self.drive_publisher.publish(
-            Float32MultiArray(data=[float(motor_mps), float(steering_deg)])
+            Float32MultiArray(data=[float(motor_mps), float(steering_deg * self.steering_gain)])
         )
-        # Keep training the shared residual under non-MPC planners
-        if self.active_planner_name != MPCPlanner.name and mpc is not None:
-            try:
-                mpc.train_step(
-                    motor_mps, steering_deg,
-                    self.current_xy[0], self.current_xy[1],
-                    self.current_yaw, self.current_speed_mps,
-                    inputs.now_ns,
-                )
-            except Exception:
-                self.logger.error(f"Residual train_step error:\n{traceback.format_exc()}")
+        # Residual training runs every tick. If MPC is the active planner, its
+        # own plan() already pushed/stepped — skip to avoid double-counting.
+        if self.active_planner_name != MPCPlanner.name:
+            if mpc is not None:
+                try:
+                    mpc.train_step(
+                        motor_mps, steering_deg,
+                        self.current_xy[0], self.current_xy[1],
+                        self.current_yaw, self.current_speed_mps,
+                        inputs.now_ns,
+                    )
+                except Exception:
+                    self.logger.error(f"Residual train_step error:\n{traceback.format_exc()}")
 
     # Manual mode
     #
@@ -317,8 +409,9 @@ def _load_line_csv(self, path: str) -> List[Tuple[float, ...]]:
         return rows
 
     def _build_planners(self) -> dict:
-        """Construct every registered planner. MPC gets the shared residual
-        so training survives planner / line swaps."""
+        """Construct every registered planner against the current racing line.
+        The MPC instance receives the shared ResidualLearner so training state
+        survives planner / line swaps."""
         out: dict = {}
         for name, cls in PLANNERS.items():
             planner_params = {
@@ -370,6 +463,16 @@ def _yaw_from_quaternion(q) -> float:
         cosy_cosp = 1.0 - 2.0 * (y * y + z * z)
         return math.atan2(siny_cosp, cosy_cosp)
 
+    def destroy_node(self):
+        try:
+            self.shared_residual.shutdown()
+        except Exception as exc:
+            try:
+                self.get_logger().warning(f"residual.shutdown() raised: {exc!r}")
+            except Exception:
+                pass
+        super().destroy_node()
+
 
 def main(args=None):
     rclpy.init(args=args)
diff --git a/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/base.py b/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/base.py
index 5546dd9b..9a16454a 100644
--- a/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/base.py
+++ b/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/base.py
@@ -1,5 +1,6 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
+from math import nan
 from typing import ClassVar, Optional, Tuple
 
 
@@ -21,6 +22,23 @@ class PlannerInputs:
     speed_mps: float
     track_angles: Optional[Tuple[float, ...]]
     now_ns: int
+    # Raw GPS pose passthrough for EKF-vs-GPS comparison in mpc/status.
+    # NaN when no GPS fix has been seen yet.
+    gps_xy: Tuple[float, float] = (nan, nan)
+    gps_yaw_rad: float = nan
+    gps_speed_mps: float = nan
+    # Latest VESC wheel-speed (m/s). NaN until first /e_comms/kart_speed msg.
+    wheel_speed_mps: float = nan
+    # Latest /localization/gps_event snapshot
+    gps_event_seq: float = 0.0
+    gps_have_yaw: float = 0.0
+    gps_have_speed: float = 0.0
+    ekf_prior_xy: Tuple[float, float] = (nan, nan)
+    ekf_prior_yaw_rad: float = nan
+    ekf_prior_v: float = nan
+    ekf_post_xy: Tuple[float, float] = (nan, nan)
+    ekf_post_yaw_rad: float = nan
+    ekf_post_v: float = nan
 
 
 class Planner(ABC):
diff --git a/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/mpc.py b/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/mpc.py
index f3548867..5902880c 100644
--- a/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/mpc.py
+++ b/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/mpc.py
@@ -75,6 +75,10 @@ def __init__(self, params: dict, kart: KartConstants, racing_line: list,
         # Below this speed, dpsi = v/L*tan(delta) ~= 0 so the cost can't
         # distinguish steering choices — hold delta at 0 until v crosses the gate.
         self.steer_observability_v = float(g("steer_observability_v_mps", 0.5))
+        # Elite-mean fraction: instead of argmin over K samples, average the
+        # top `mppi_elite_frac × K` samples' first actions. Cuts per-tick noise
+        # without the phase-lag cost of a 1st-order filter. 1.0/K = pure argmin (no smoothing).
+        self.mppi_elite_frac = float(g("mppi_elite_frac", 0.1))
 
         # Corridor (centerline = racing line)
         tw = float(g("track_half_width_m", 2.5))
@@ -133,6 +137,9 @@ def __init__(self, params: dict, kart: KartConstants, racing_line: list,
         # State carried between ticks
         self.closest_idx = 0  # idx into the ACTIVE reference (line or bezier)
         self.static_closest_idx = 0  # idx into the static racing line (for CTE gate)
+        # First plan() does an O(n) nearest search so a kart spawned mid-track
+        # isn't pinned near idx 0 by _frenet's wrap-aware resync gate.
+        self.initial_sync_done = False
         self.delta_prev = 0.0
         self.a_prev = 0.0
         self.u_mean = np.zeros((2, self.N))  # warm-start [delta(N), accel(N)]
@@ -147,7 +154,10 @@ def __init__(self, params: dict, kart: KartConstants, racing_line: list,
             if k.startswith("residual.")
         }
         # Shared ResidualLearner survives planner / line swaps so training state persists
-        self.residual = residual if residual is not None else ResidualLearner(residual_params, solve_dt)
+        s_total = float(self._static_arrays["s"][-1])
+        self.residual = residual if residual is not None else ResidualLearner(
+            residual_params, solve_dt, s_total=s_total,
+        )
         self._nom_steps = max(1, int(round(self.residual.target_horizon_s / self.dt)))
         self._last_motor_mps = 0.0
         self._train_dt = solve_dt
@@ -165,16 +175,56 @@ def __init__(self, params: dict, kart: KartConstants, racing_line: list,
         # Reusable rng
         self._rng = np.random.default_rng(0)
 
+        # Latest raw GPS pose snapshot — appended to status payload so the
+        # mpc/status log alone is enough to diff EKF vs raw GPS offline.
+        self._gps_x = math.nan
+        self._gps_y = math.nan
+        self._gps_yaw = math.nan
+        self._gps_v = math.nan
+        # Wheel speed (m/s) + last-GPS-event EKF prior/posterior snapshot.
+        # Drives the offline EKF-innovation analysis: prior is the IMU+wheel
+        # dead-reckoned state right before GPS fused in; post is after.
+        self._wheel_v = math.nan
+        self._gps_event_seq = 0.0
+        self._gps_have_yaw = 0.0
+        self._gps_have_speed = 0.0
+        self._ekf_prior_x = math.nan
+        self._ekf_prior_y = math.nan
+        self._ekf_prior_yaw = math.nan
+        self._ekf_prior_v = math.nan
+        self._ekf_post_x = math.nan
+        self._ekf_post_y = math.nan
+        self._ekf_post_yaw = math.nan
+        self._ekf_post_v = math.nan
+
     # plan
     def plan(self, inputs: PlannerInputs) -> Optional[Tuple[float, float]]:
         t0 = time.perf_counter()
         x, y = inputs.pose_xy
         yaw = inputs.yaw_rad
         v = max(0.0, inputs.speed_mps)
+        self._gps_x, self._gps_y = inputs.gps_xy
+        self._gps_yaw = inputs.gps_yaw_rad
+        self._gps_v = inputs.gps_speed_mps
+        self._wheel_v = inputs.wheel_speed_mps
+        self._gps_event_seq = inputs.gps_event_seq
+        self._gps_have_yaw = inputs.gps_have_yaw
+        self._gps_have_speed = inputs.gps_have_speed
+        self._ekf_prior_x, self._ekf_prior_y = inputs.ekf_prior_xy
+        self._ekf_prior_yaw = inputs.ekf_prior_yaw_rad
+        self._ekf_prior_v = inputs.ekf_prior_v
+        self._ekf_post_x, self._ekf_post_y = inputs.ekf_post_xy
+        self._ekf_post_yaw = inputs.ekf_post_yaw_rad
+        self._ekf_post_v = inputs.ekf_post_v
 
         # Frenet against the STATIC racing line so the rejoin gate uses
         #    CTE-to-line, not CTE-to-overlay.
         self._set_active_line(self._static_arrays)
+        if not self.initial_sync_done:
+            d2 = (self.l_x - x) ** 2 + (self.l_y - y) ** 2
+            self.static_closest_idx = int(np.argmin(d2))
+            self.closest_idx = self.static_closest_idx
+            self.initial_sync_done = True
         _, d_static, j_static, _, _ = self._frenet(x, y, self.static_closest_idx)
         self.static_closest_idx = j_static
 
@@ -283,7 +333,11 @@ def plan(self, inputs: PlannerInputs) -> Optional[Tuple[float, float]]:
         )
         self.residual.push(phi, s, d, nom_s - s, nom_d - d, v)
         self.residual.step(s, d)
-        res_ps, res_pd = self.residual.predict(phi)
+        res_ps, res_pd, _residual_source = self.residual.predict(phi)
+        # effective_mode gating: zero the residual any time we're not yet in apply
+        # so future consumers automatically see (0, 0) until the apply gate opens.
+        if self.residual.effective_mode() != "apply":
+            res_ps, res_pd = 0.0, 0.0
 
         # Telemetry
         margin_min = self.corridor_half - float(np.max(np.abs(traj["d"])))
@@ -518,11 +572,20 @@ def _solve(self, x0, y0, yaw0, v0, j_now, v_target, v_cap):
         cost = (c_d + c_h + c_v + c_delta + c_drate + c_accel
                 + c_bnd + c_edge + c_prog + c_term_d + c_term_h + c_alat)
 
-        # Corridor enforcement is a soft quadratic penalty (c_bnd above)
-        best = int(np.argmin(cost))
+        # Corridor enforcement is a soft quadratic penalty (c_bnd above).
+        # Pick the action by averaging the top `mppi_elite_frac × K` samples'
+        # control sequences (CEM-style elite mean). The reported "best"
+        # cost is still the elite minimum so feasibility / failsafe still
+        # gate on the truly-best trajectory.
+        n_elite = max(1, int(round(self.mppi_elite_frac * self.K)))
+        elite_idx = np.argpartition(cost, n_elite - 1)[:n_elite]
+        best = int(elite_idx[np.argmin(cost[elite_idx])])
         best_cost = float(cost[best])
         feasible = best_cost < self.feasibility_threshold
-        u = np.stack([delta_seq[best], accel_seq[best]], axis=0)
+        u = np.stack([
+            np.mean(delta_seq[elite_idx, :], axis=0),
+            np.mean(accel_seq[elite_idx, :], axis=0),
+        ], axis=0)
         traj = {"d": d[best], "s": s[best]}
         breakdown = (
             float(c_d[best]), float(c_h[best]), float(c_v[best]),
@@ -544,6 +607,7 @@ def set_racing_line(self, racing_line: list) -> None:
         self.line_manager.register(RejoinStrategy(**self._rejoin_kwargs))
         self.closest_idx = 0
         self.static_closest_idx = 0
+        self.initial_sync_done = False
         self._reset_warm_start()
         self._pose_hist.clear()
 
@@ -551,6 +615,9 @@ def train_step(self, motor_mps: float, steering_deg: float,
                    x: float, y: float, yaw: float, v: float, now_ns: int) -> None:
         """Drive the residual learner from another planner's commands so it
         keeps training when MPC isn't the active planner."""
+        # Sync MPC's internal control state to what was actually executed.
+        self.delta_prev = math.radians(steering_deg)
+        self.u_mean[0, :] = self.delta_prev
         if not self.residual.enabled:
             return
         t0 = time.perf_counter()
@@ -574,7 +641,10 @@ def train_step(self, motor_mps: float, steering_deg: float,
         )
         self.residual.push(phi, s, d, nom_s - s, nom_d - d, v)
         self.residual.step(s, d)
-        res_ps, res_pd = self.residual.predict(phi)
+        res_ps, res_pd, _residual_source = self.residual.predict(phi)
+        # effective_mode gating: zero the residual any time we're not yet in apply
+        if self.residual.effective_mode() != "apply":
+            res_ps, res_pd = 0.0, 0.0
         self._publish_status(
             self.mode, True, t0, s, d, psi_track, v, self.target_speed,
             delta_rad, accel, 0.0, self.corridor_half,
@@ -604,6 +674,8 @@ def _publish_status(self, mode, success, t0, s, d, psi_track, v, v_target,
             return
         solve_ms = (time.perf_counter() - t0) * 1000.0
         nom_s, nom_d, res_es, res_ed = self.residual.mean_error()
+        trainer = self.residual.trainer
+        last = trainer.snapshot_models()[2] if trainer else None
         payload = [
             float(mode), 1.0 if success else 0.0, solve_ms,
             float(s), float(d), float(psi_track), float(v), float(v_target),
@@ -623,7 +695,49 @@ def _publish_status(self, mode, success, t0, s, d, psi_track, v, v_target,
             float({"off": 0, "shadow": 1, "apply": 2}.get(self.residual.mode, 1)),
             float(np.linalg.norm(self.residual.theta_s)),
             float(np.linalg.norm(self.residual.theta_d)),
+            # Raw GPS pose (EKF-vs-GPS diff). NaN until first /gps fix.
+            float(self._gps_x), float(self._gps_y),
+            float(self._gps_yaw), float(self._gps_v),
+            # Wheel speed + last-GPS-event EKF prior/posterior. seq lets the
+            # analyst dedupe MPC ticks (60 Hz) against GPS events (10 Hz).
+            float(self._wheel_v), float(self._gps_event_seq),
+            float(self._gps_have_yaw), float(self._gps_have_speed),
+            float(self._ekf_prior_x), float(self._ekf_prior_y),
+            float(self._ekf_prior_yaw), float(self._ekf_prior_v),
+            float(self._ekf_post_x), float(self._ekf_post_y),
+            float(self._ekf_post_yaw), float(self._ekf_post_v),
+            # ---- Phase 1 residual telemetry (Phase 2 fields stay 0/NaN here) ----
+            float(self.residual.buffer.size if self.residual.buffer is not None else 0),
+            float(self.residual.buffer.capacity if self.residual.buffer is not None else 0),
+            float(last.train_wall_ms if last is not None else float("nan")),
+            float(last.n_samples if last is not None else 0),
+            float(trainer.train_seq if trainer else 0),
+            float(last.train_mae_s if last is not None else float("nan")),
+            float(last.train_mae_d if last is not None else float("nan")),
+            float(last.val_mae_s if last is not None else float("nan")),
+            float(last.val_mae_d if last is not None else float("nan")),
+            float(last.rls_val_mae_s if last is not None else float("nan")),
+            float(last.rls_val_mae_d if last is not None else float("nan")),
+            float(self.residual.last_active_model),  # active_model
+            float(self.residual.last_pred_clipped),
+            float(self.residual.clip_rate()),
+            float(self.residual.outliers_dropped),
+            float(self.residual.off_line_skipped),
+            float(self.residual.divergence_resets),
+            float(self.residual.samples_trained),
+            float(self.residual.revert_count),
+            float(self.residual.checkpoint_ring.best_recent().val_mae_s
+                  if self.residual.checkpoint_ring and not self.residual.checkpoint_ring.is_empty()
+                  else float("nan")),
+            float(self.residual.checkpoint_ring.best_recent().val_mae_d
+                  if self.residual.checkpoint_ring and not self.residual.checkpoint_ring.is_empty()
+                  else float("nan")),
+            float(1.0 if self.residual.cache_loaded else 0.0),
+            float(1.0 if self.residual.samples_trained >= self.residual.rls_warmup_samples else 0.0),
+            float({"off": 0, "shadow": 1, "apply": 2}.get(self.residual.effective_mode(), 1)),
+            float(self.residual.samples_accepted_this_run),
         ]
+        assert len(payload) == 88, f"mpc/status payload must be 88 floats, got {len(payload)}"
         self.status_pub.publish(Float32MultiArray(data=payload))
 
     def dynamic_line_state(self) -> Optional[dict]:
diff --git a/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/mpc_residual.py b/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/mpc_residual.py
index f712be83..1ce20893 100644
--- a/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/mpc_residual.py
+++ b/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/mpc_residual.py
@@ -1,25 +1,26 @@
 """Recursive least-squares residual learner for the MPC planner.
 
-Runs in shadow mode by default — trains/predicts/logs but does NOT affect
-control output. The MPC planner consults `predict` only when mode == "apply".
-
-The learner predicts the residual delta_s / delta_d between the nominal
-bicycle-model rollout and the actual kart motion over a target horizon
-(default 0.5 s). A delayed sample with the actual measured displacement
-arrives `horizon_steps` ticks later, at which point we update theta with
-the standard RLS recursion using a shared P matrix.
+Phase 1 (this commit): tightened outlier filter, |d|-based training filter,
+RLS warmup gate, effective-mode gating, counters, and a `predict()` signature
+that returns (res_s, res_d, source). The GBM trainer + cache + checkpoint
+ring land in Phase 2 — for now those slots stay `None`.
 """
+from __future__ import annotations
 
 from collections import deque
-from typing import Deque, Optional, Tuple
+from typing import Deque, Tuple
 
 import numpy as np
 
-# Feature-vector layout
+# Feature-vector layout (kept in sync with mpc.py's _features helper).
 NUM_STEER_HIST = 4
 NUM_THROTTLE_HIST = 3
 NUM_FEATURES = 1 + 4 + NUM_STEER_HIST + NUM_THROTTLE_HIST + 2  # bias, (d,v_s,v_d,k), hist, (nom_ds,nom_dd)
 
+# Source ids returned by predict()
+SOURCE_RLS = 0
+SOURCE_GBM = 1
+
 
 def _features(d: float, v_s: float, v_d: float, kappa: float,
               steer_hist: Tuple[float, ...],
@@ -27,36 +28,42 @@ def _features(d: float, v_s: float, v_d: float, kappa: float,
               nom_ds: float, nom_dd: float) -> np.ndarray:
     return np.array([
         1.0, d, v_s, v_d, kappa,
-        *steer_hist,  # NUM_STEER_HIST entries
-        *throttle_hist,  # NUM_THROTTLE_HIST entries
+        *steer_hist, *throttle_hist,
         nom_ds, nom_dd,
     ], dtype=np.float64)
 
 
 class ResidualLearner:
-    def __init__(self, params: dict, solve_dt: float):
-        """solve_dt is the wall-time between successive plan() calls
-        (= 1/system_frequency), used to size the push/step delay.
-        Defaults here are a fallback only; yaml under residual.* is canonical."""
+    def __init__(self, params: dict, solve_dt: float, s_total: float = 0.0):
         g = params.get
-        self.mode = str(g("mode", "shadow")).lower()  # off | shadow | apply
+        self.mode = str(g("mode", "shadow")).lower()
         self.target_horizon_s = float(g("target_horizon_s", 0.5))
         self.horizon_steps = max(1, int(round(self.target_horizon_s / max(solve_dt, 1e-3))))
         self.lam = float(g("forgetting_factor", 0.99))
         self.min_speed = float(g("min_train_speed_mps", 0.5))
         self.err_window = int(g("error_window", 200))
+        self.s_total = float(s_total)
+        self.outlier_threshold = float(g("outlier_threshold_m", 2.0))
+        self.max_train_d_m = float(g("max_train_d_m", 3.0))
+        self.max_p_trace = float(g("max_p_trace", 1.0e5))
+        self.max_theta_norm = float(g("max_theta_norm", 50.0))
+        self.rls_warmup_samples = int(g("rls_warmup_samples", 50))
+        self.apply_min_samples_this_run = int(g("apply_min_samples_this_run", 1200))
+        self.gbm_enabled = bool(g("gbm_enabled", False))
+        self.cache_enabled = bool(g("cache_enabled", True))
+        self.cache_dir = str(g("cache_dir", "/root/.cache/residual_cache"))
+        self.cache_load_on_start = bool(g("cache_load_on_start", True))
+        self.cache_loaded = False
+        self.cache = None
 
-        p0 = float(g("initial_cov", 1000.0))
+        self.p0 = float(g("initial_cov", 1000.0))
         self.theta_s = np.zeros(NUM_FEATURES)
         self.theta_d = np.zeros(NUM_FEATURES)
-        self.P = np.eye(NUM_FEATURES) * p0
+        self.P = np.eye(NUM_FEATURES) * self.p0
 
-        # Buffer holds the last `horizon_steps` snapshots; pop when target arrives.
         self._pending: Deque[Tuple[np.ndarray, float, float, float, float]] = deque(
             maxlen=self.horizon_steps + 1
         )
-
-        # Rolling errors for shadow-vs-nominal diagnostics.
         self._err_nom_s: Deque[float] = deque(maxlen=self.err_window)
         self._err_nom_d: Deque[float] = deque(maxlen=self.err_window)
         self._err_res_s: Deque[float] = deque(maxlen=self.err_window)
@@ -65,35 +72,170 @@ def __init__(self, params: dict, solve_dt: float):
         self.last_pred_s = 0.0
         self.last_pred_d = 0.0
         self.samples_trained = 0
+        self.samples_accepted_this_run = 0
+        self.outliers_dropped = 0
+        self.off_line_skipped = 0
+        self.divergence_resets = 0
+
+        # Phase 2 slots (intentionally None in Phase 1; filled in later tasks).
+        self.buffer = None
+        self.trainer = None
+        self.checkpoint_ring = None
+        self.use_gbm = False
+        self.last_active_model = SOURCE_RLS
+        self.last_pred_clipped = 0
+        self._gbm_s = None
+        self._gbm_d = None
+        self._last_val_mae_s = float("nan")
+        self._last_val_mae_d = float("nan")
+        self._last_rls_val_mae_s = float("nan")
+        self._last_rls_val_mae_d = float("nan")
+        self._select_eps_s = float(g("gbm_select_eps_s_m", 0.01))
+        self._select_eps_d = float(g("gbm_select_eps_d_m", 0.005))
+        self._predict_clip_m = float(g("gbm_predict_clip_m", 0.5))
+        from autonomous_kart.nodes.pathfinder.planners.residual.checkpoint import CheckpointRing
+        from autonomous_kart.nodes.pathfinder.planners.residual.regime import RegimeSignature
+        self.checkpoint_ring = CheckpointRing(size=int(g("checkpoint_ring_size", 5)))
+        self.regime_dv_tol = float(g("regime_dv_tol_mps", 1.0))
+        self.regime_dd_tol = float(g("regime_dd_tol_m", 0.3))
+        self.regime_dkappa_rel_tol = float(g("regime_dkappa_rel_tol", 0.5))
+        self.revert_clip_rate_threshold = float(g("revert_clip_rate_threshold", 0.30))
+        self.revert_count = 0
+        self._current_regime = RegimeSignature(0.0, 0.0, 0.0)
+        self.clip_rate_window_size = int(g("revert_clip_rate_window", 600))
+        self._clip_history: Deque[int] = deque(maxlen=self.clip_rate_window_size)
+        self._regime_window_size = 1200
+        self._v_hist: Deque[float] = deque(maxlen=self._regime_window_size)
+        self._d_hist: Deque[float] = deque(maxlen=self._regime_window_size)
+        self._kappa_hist: Deque[float] = deque(maxlen=self._regime_window_size)
+
+        if self.cache_enabled:
+            from autonomous_kart.nodes.pathfinder.planners.residual.cache import ResidualCache
+            self.cache = ResidualCache(self.cache_dir, feature_dim=NUM_FEATURES)
+            if self.cache_load_on_start:
+                loaded = self.cache.load()
+                if loaded is not None:
+                    self.theta_s = loaded.theta_s.copy()
+                    self.theta_d = loaded.theta_d.copy()
+                    self.P = loaded.P.copy()
+                    self.samples_trained = loaded.samples_trained
+                    self.cache_loaded = True
+                    gs, gd = self.cache.load_gbm()
+                    if gs is not None and gd is not None:
+                        # Park them; use_gbm stays False until the next fresh
+                        # GBM training cycle re-validates against this-run data.
+                        self._gbm_s, self._gbm_d = gs, gd
+                        self._last_val_mae_s = float("nan")
+                        self._last_val_mae_d = float("nan")
+                        self.use_gbm = False
+
+        if self.gbm_enabled:
+            from autonomous_kart.nodes.pathfinder.planners.residual.buffer import TrainBuffer
+            from autonomous_kart.nodes.pathfinder.planners.residual.trainer import GBMTrainer
+            self.buffer = TrainBuffer(
+                capacity=int(g("gbm_buffer_capacity", 18000)),
+                feature_dim=NUM_FEATURES,
+            )
+            self.trainer = GBMTrainer(
+                buffer=self.buffer,
+                rls_predict_batch=self._rls_predict_batch,
+                max_iter=int(g("gbm_max_iter", 60)),
+                max_depth=int(g("gbm_max_depth", 3)),
+                learning_rate=float(g("gbm_learning_rate", 0.1)),
+                min_samples_leaf=int(g("gbm_min_samples_leaf", 20)),
+                min_samples_to_train=int(g("gbm_min_samples_to_train", 1200)),
+                retrain_secs=float(g("gbm_retrain_secs", 10.0)),
+                retrain_every_samples=int(g("gbm_retrain_every_samples", 1200)),
+            )
+            # Monkey-patch the trainer's train_once so the learner installs the
+            # new GBM (subject to a hard val_mae safety floor) and recomputes
+            # the selector flag after every successful retrain. This keeps the
+            # trainer class generic — installation policy lives in the learner.
+            original_train_once = self.trainer.train_once
+
+            def _train_and_install():
+                result = original_train_once()
+                if result is None or result.gbm_s is None:
+                    return result
+                if result.val_mae_s > 2.0 * result.rls_val_mae_s:
+                    return result
+                self._install_gbm(
+                    result.gbm_s, result.gbm_d,
+                    val_mae_s=result.val_mae_s, val_mae_d=result.val_mae_d,
+                    rls_val_mae_s=result.rls_val_mae_s, rls_val_mae_d=result.rls_val_mae_d,
+                )
+                # Record checkpoint
+                if self.checkpoint_ring is not None:
+                    from autonomous_kart.nodes.pathfinder.planners.residual.checkpoint import Checkpoint
+                    self.checkpoint_ring.record(Checkpoint(
+                        train_seq=result.train_seq, t_wall_ns=result.t_wall_ns,
+                        theta_s=self.theta_s.copy(), theta_d=self.theta_d.copy(), P=self.P.copy(),
+                        samples_trained=self.samples_trained,
+                        gbm_s=result.gbm_s, gbm_d=result.gbm_d,
+                        val_mae_s=result.val_mae_s, val_mae_d=result.val_mae_d,
+                        rls_val_mae_s=result.rls_val_mae_s, rls_val_mae_d=result.rls_val_mae_d,
+                        regime=self._current_regime,
+                    ))
+                # Persist to disk
+                if self.cache is not None:
+                    try:
+                        self.cache.write_rls(
+                            theta_s=self.theta_s, theta_d=self.theta_d, P=self.P,
+                            samples_trained=self.samples_trained,
+                            best_val_mae_s=result.val_mae_s, best_val_mae_d=result.val_mae_d,
+                            train_seq=result.train_seq,
+                        )
+                        self.cache.write_gbm(result.gbm_s, result.gbm_d)
+                    except Exception:
+                        pass
+                return result
+
+            self.trainer.train_once = _train_and_install
+            self.trainer.start()
 
     @property
     def enabled(self) -> bool:
         return self.mode in ("shadow", "apply")
 
+    def effective_mode(self) -> str:
+        """Return the *gated* mode: 'apply' is downgraded to 'shadow' until
+        samples_accepted_this_run >= apply_min_samples_this_run."""
+        if self.mode == "off":
+            return "off"
+        if self.mode == "apply" and self.samples_accepted_this_run < self.apply_min_samples_this_run:
+            return "shadow"
+        return self.mode
+
     def push(self, phi: np.ndarray, s_t: float, d_t: float,
              nom_ds: float, nom_dd: float, speed: float) -> None:
-        """Stash current features + Frenet pose so we can score it `horizon_steps`
-        ticks later when the actual delta is known."""
         if not self.enabled:
             return
         if speed < self.min_speed:
-            # Still buffer for delay alignment, but mark invalid by NaN in phi[0]?
-            # Simpler: only push valid samples; drop intermittently below threshold.
+            return
+        if abs(d_t) >= self.max_train_d_m:
+            self.off_line_skipped += 1
             return
         self._pending.append((phi.copy(), s_t, d_t, nom_ds, nom_dd))
 
     def step(self, s_now: float, d_now: float) -> None:
-        """Pop the oldest pending sample (if it has aged H ticks) and run RLS."""
         if not self.enabled or len(self._pending) <= self.horizon_steps:
             return
         phi, s0, d0, nom_ds, nom_dd = self._pending.popleft()
         actual_ds = s_now - s0
         actual_dd = d_now - d0
-        # Residual targets
+        if self.s_total > 0.0:
+            half = 0.5 * self.s_total
+            if actual_ds < -half:
+                actual_ds += self.s_total
+            elif actual_ds > half:
+                actual_ds -= self.s_total
         r_s = actual_ds - nom_ds
         r_d = actual_dd - nom_dd
 
-        # Errors for diagnostics (before update)
+        if abs(r_s) > self.outlier_threshold or abs(r_d) > self.outlier_threshold:
+            self.outliers_dropped += 1
+            return
+
         pred_s = float(phi @ self.theta_s)
         pred_d = float(phi @ self.theta_d)
         self._err_nom_s.append(abs(actual_ds - nom_ds))
@@ -101,7 +243,6 @@ def step(self, s_now: float, d_now: float) -> None:
         self._err_res_s.append(abs(actual_ds - (nom_ds + pred_s)))
         self._err_res_d.append(abs(actual_dd - (nom_dd + pred_d)))
 
-        # RLS recursion (shared P)
         Pphi = self.P @ phi
         denom = self.lam + float(phi @ Pphi)
         if denom < 1e-9:
@@ -111,18 +252,192 @@ def step(self, s_now: float, d_now: float) -> None:
         self.theta_d += K * (r_d - float(phi @ self.theta_d))
         self.P = (self.P - np.outer(K, Pphi)) / self.lam
         self.samples_trained += 1
+        self.samples_accepted_this_run += 1
+
+        if self.buffer is not None:
+            self.buffer.append(phi, r_s=float(r_s), r_d=float(r_d), t_ns=0)
+
+        # phi[1] is d, phi[2] is v_s, phi[4] is kappa per the _features layout.
+        self._v_hist.append(float(phi[2]))
+        self._d_hist.append(float(phi[1]))
+        self._kappa_hist.append(float(phi[4]))
+        self._current_regime = self._compute_current_regime()
+
+        tr = float(np.trace(self.P))
+        if tr > self.max_p_trace:
+            self.P *= self.max_p_trace / tr
 
-    def predict(self, phi: np.ndarray) -> Tuple[float, float]:
+        if (np.linalg.norm(self.theta_s) > self.max_theta_norm
+                or np.linalg.norm(self.theta_d) > self.max_theta_norm):
+            self._trigger_divergence_check()
+
+    def predict(self, phi: np.ndarray) -> Tuple[float, float, int]:
         if not self.enabled:
             self.last_pred_s = 0.0
             self.last_pred_d = 0.0
-            return 0.0, 0.0
-        self.last_pred_s = float(phi @ self.theta_s)
-        self.last_pred_d = float(phi @ self.theta_d)
-        return self.last_pred_s, self.last_pred_d
+            self.last_active_model = SOURCE_RLS
+            self.last_pred_clipped = 0
+            return 0.0, 0.0, SOURCE_RLS
+        if self.samples_trained < self.rls_warmup_samples and not self.use_gbm:
+            self.last_pred_s = 0.0
+            self.last_pred_d = 0.0
+            self.last_active_model = SOURCE_RLS
+            self.last_pred_clipped = 0
+            self._clip_history.append(0)
+            return 0.0, 0.0, SOURCE_RLS
+        if self.use_gbm and self._gbm_s is not None and self._gbm_d is not None:
+            X = phi.reshape(1, -1)
+            s_raw = float(self._gbm_s.predict(X)[0])
+            d_raw = float(self._gbm_d.predict(X)[0])
+            clip = self._predict_clip_m
+            s = max(-clip, min(clip, s_raw))
+            d = max(-clip, min(clip, d_raw))
+            clipped = 1 if (s != s_raw or d != d_raw) else 0
+            self.last_pred_s = s
+            self.last_pred_d = d
+            self.last_active_model = SOURCE_GBM
+            self.last_pred_clipped = clipped
+            self._clip_history.append(clipped)
+            self._maybe_clip_revert()
+            return s, d, SOURCE_GBM
+        # Offline replay observed RLS spikes to ~200 m of Δs prediction in benign driving
+        # Clip is hard safety
+        s_raw = float(phi @ self.theta_s)
+        d_raw = float(phi @ self.theta_d)
+        clip = self._predict_clip_m
+        s = max(-clip, min(clip, s_raw))
+        d = max(-clip, min(clip, d_raw))
+        clipped = 1 if (s != s_raw or d != d_raw) else 0
+        self.last_pred_s = s
+        self.last_pred_d = d
+        self.last_active_model = SOURCE_RLS
+        self.last_pred_clipped = clipped
+        self._clip_history.append(clipped)
+        return s, d, SOURCE_RLS
 
     def mean_error(self) -> Tuple[float, float, float, float]:
         def _mean(d): return float(np.mean(d)) if d else 0.0
 
         return (_mean(self._err_nom_s), _mean(self._err_nom_d),
                 _mean(self._err_res_s), _mean(self._err_res_d))
+
+    def clip_rate(self) -> float:
+        if not self._clip_history:
+            return 0.0
+        return sum(self._clip_history) / len(self._clip_history)
+
+    def save_rls_cache(self, *, best_val_mae_s: float, best_val_mae_d: float, train_seq: int) -> None:
+        if self.cache is None:
+            return
+        self.cache.write_rls(
+            theta_s=self.theta_s, theta_d=self.theta_d, P=self.P,
+            samples_trained=self.samples_trained,
+            best_val_mae_s=best_val_mae_s,
+            best_val_mae_d=best_val_mae_d,
+            train_seq=train_seq,
+        )
+
+    def _seed_checkpoint(self, *, theta_s, theta_d, P, samples_trained,
+                          val_mae_s, val_mae_d, regime) -> None:
+        """Test/cache-load helper that pushes a Checkpoint into the ring."""
+        from autonomous_kart.nodes.pathfinder.planners.residual.checkpoint import Checkpoint
+        self.checkpoint_ring.record(Checkpoint(
+            train_seq=0, t_wall_ns=0,
+            theta_s=theta_s.copy(), theta_d=theta_d.copy(), P=P.copy(),
+            samples_trained=int(samples_trained),
+            gbm_s=None, gbm_d=None,
+            val_mae_s=val_mae_s, val_mae_d=val_mae_d,
+            rls_val_mae_s=val_mae_s * 1.5, rls_val_mae_d=val_mae_d * 1.5,
+            regime=regime,
+        ))
+
+    def _trigger_divergence_check(self) -> None:
+        from autonomous_kart.nodes.pathfinder.planners.residual.regime import similar
+        cp = self.checkpoint_ring.best_recent() if self.checkpoint_ring else None
+        if cp is not None and similar(
+            self._current_regime, cp.regime,
+            dv_tol=self.regime_dv_tol, dd_tol=self.regime_dd_tol,
+            dkappa_rel_tol=self.regime_dkappa_rel_tol,
+        ):
+            self.theta_s = cp.theta_s.copy()
+            self.theta_d = cp.theta_d.copy()
+            self.P = cp.P.copy()
+        else:
+            self.theta_s.fill(0.0)
+            self.theta_d.fill(0.0)
+            self.P = np.eye(NUM_FEATURES) * self.p0
+            self.samples_trained = 0
+        self.divergence_resets += 1
+        self.revert_count += 1
+
+    def _maybe_clip_revert(self) -> None:
+        if not self.use_gbm:
+            return
+        rate = self.clip_rate()
+        if rate >= self.revert_clip_rate_threshold and len(self._clip_history) == self._clip_history.maxlen:
+            self.use_gbm = False
+            self.revert_count += 1
+
+    def manual_revert(self) -> None:
+        if self.checkpoint_ring is None or self.checkpoint_ring.is_empty():
+            return
+        cp = self.checkpoint_ring.best_recent()
+        self.theta_s = cp.theta_s.copy()
+        self.theta_d = cp.theta_d.copy()
+        self.P = cp.P.copy()
+        if cp.gbm_s is not None and cp.gbm_d is not None:
+            self._install_gbm(cp.gbm_s, cp.gbm_d,
+                              val_mae_s=cp.val_mae_s, val_mae_d=cp.val_mae_d,
+                              rls_val_mae_s=cp.rls_val_mae_s, rls_val_mae_d=cp.rls_val_mae_d)
+        self.revert_count += 1
+
+    def _install_gbm(self, gbm_s, gbm_d, *, val_mae_s: float, val_mae_d: float,
+                     rls_val_mae_s: float, rls_val_mae_d: float) -> None:
+        """Install a pair of GBM regressors and recompute the selector flag.
+
+        Real callers are GBMTrainer (after training) and the cache loader
+        (Task 17). Tests call it directly to skip the threaded trainer.
+        """
+        self._gbm_s = gbm_s
+        self._gbm_d = gbm_d
+        self._last_val_mae_s = val_mae_s
+        self._last_val_mae_d = val_mae_d
+        self._last_rls_val_mae_s = rls_val_mae_s
+        self._last_rls_val_mae_d = rls_val_mae_d
+        self._recompute_use_gbm()
+
+    def _recompute_use_gbm(self) -> None:
+        """Set self.use_gbm with hysteresis. Bad-GBM hard safety: never use
+        when val_mae_s > 2 × rls_val_mae_s."""
+        import math
+        if self._gbm_s is None or not math.isfinite(self._last_val_mae_s):
+            self.use_gbm = False
+            return
+        if self._last_val_mae_s > 2.0 * self._last_rls_val_mae_s:
+            self.use_gbm = False
+            return
+        beats_on_s = self._last_val_mae_s + self._select_eps_s < self._last_rls_val_mae_s
+        beats_on_d = self._last_val_mae_d + self._select_eps_d < self._last_rls_val_mae_d
+        if self.use_gbm:
+            # Stay on GBM if GBM still beats RLS on s OR d (hysteresis).
+            self.use_gbm = beats_on_s or beats_on_d
+        else:
+            # Flip to GBM if GBM beats RLS on s OR d.
+            self.use_gbm = beats_on_s or beats_on_d
+
+    def _compute_current_regime(self):
+        from autonomous_kart.nodes.pathfinder.planners.residual.regime import RegimeSignature
+        v = np.fromiter(self._v_hist, dtype=np.float64)
+        d = np.fromiter(self._d_hist, dtype=np.float64)
+        k = np.fromiter(self._kappa_hist, dtype=np.float64)
+        # The residual sees v_s (longitudinal speed in Frenet) only — use its
+        # magnitude as a v proxy.
+        return RegimeSignature.from_arrays(np.abs(v), d, k)
+
+    def _rls_predict_batch(self, X: np.ndarray):
+        """Batch RLS predictor for the trainer's held-out scoring."""
+        return X @ self.theta_s, X @ self.theta_d
+
+    def shutdown(self) -> None:
+        if self.trainer is not None:
+            self.trainer.stop(timeout=2.0)
diff --git a/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/opencv.py b/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/opencv.py
index d48f7f81..e4ff9416 100644
--- a/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/opencv.py
+++ b/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/opencv.py
@@ -1,8 +1,9 @@
+import math
+
 from typing import Optional, Tuple
 
 from autonomous_kart.nodes.pathfinder.planners.base import KartConstants, Planner, PlannerInputs
 
-
 class OpenCVPlanner(Planner):
     """Camera-only planner: drives solely from the OpenCV
     pathfinder's edge angles"""
@@ -11,7 +12,35 @@ class OpenCVPlanner(Planner):
 
     def __init__(self, params: dict, kart: KartConstants, racing_line: list, logger=None, node=None):
         super().__init__(params, kart, racing_line, logger, node=node)
-        raise NotImplementedError("OpenCVPlanner is not implemented yet")
+        self.prev_angles = None
+        self.angle_threshold = float(params.get("angle_threshold", 5.0))
+        self.steer_max_deg = float(params.get("steer_max_deg", 25.0))
+        self.throttle_mps= float(params.get("throttle_mps", 5.0))
+        self.mode = params.get("mode", "absolute")
+
+    def compute_angle(self, cur_angles):
+        angle_dif: float = cur_angles[0] - cur_angles[1]
+        self.prev_angles = angle_dif
 
+        kp = 3.5
+
+        if angle_dif <= self.angle_threshold and angle_dif > 1e-6:
+            return self.prev_angles
+
+        return angle_dif / kp
+        
+    
     def plan(self, inputs: PlannerInputs) -> Optional[Tuple[float, float]]:
-        return 0.0, 0.0
+        if inputs is None or inputs.track_angles is None or len(inputs.track_angles) < 2:
+            self.logger.warning("Inputs or inputs.track_angles is invalid")
+            return 0.0, 0.0
+        
+        self.logger.info(f"{inputs.track_angles}")
+        
+        steering_deg = self.compute_angle(inputs.track_angles)
+    
+        if steering_deg is None:
+            self.logger.warning("Degrees Was Never Changed")
+            steering_deg = 0.0
+
+        return self.throttle_mps, steering_deg
diff --git a/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/residual/__init__.py b/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/residual/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/residual/buffer.py b/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/residual/buffer.py
new file mode 100644
index 00000000..49dbd33d
--- /dev/null
+++ b/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/residual/buffer.py
@@ -0,0 +1,61 @@
+"""Fixed-capacity ring buffer of (phi, r_s, r_d, t_ns) training samples.
+
+Writes happen on the 60 Hz hot loop"""
+from __future__ import annotations
+
+import threading
+
+import numpy as np
+
+
+class TrainBuffer:
+    def __init__(self, capacity: int, feature_dim: int):
+        self.capacity = int(capacity)
+        self.feature_dim = int(feature_dim)
+        self._X = np.zeros((self.capacity, self.feature_dim), dtype=np.float64)
+        self._ys = np.zeros(self.capacity, dtype=np.float64)
+        self._yd = np.zeros(self.capacity, dtype=np.float64)
+        self._t = np.zeros(self.capacity, dtype=np.int64)
+        self._write_idx = 0       # next index to write
+        self._count = 0           # min(filled, capacity)
+        self._lock = threading.Lock()
+
+    @property
+    def size(self) -> int:
+        with self._lock:
+            return self._count
+
+    def append(self, phi: np.ndarray, r_s: float, r_d: float, t_ns: int) -> None:
+        with self._lock:
+            i = self._write_idx
+            self._X[i, :] = phi
+            self._ys[i] = r_s
+            self._yd[i] = r_d
+            self._t[i] = t_ns
+            self._write_idx = (i + 1) % self.capacity
+            if self._count < self.capacity:
+                self._count += 1
+
+    def snapshot(self) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+        """Return (X, ys, yd, t_ns) copies in chronological order."""
+        with self._lock:
+            n = self._count
+            if n == 0:
+                return (
+                    np.zeros((0, self.feature_dim)),
+                    np.zeros(0), np.zeros(0), np.zeros(0, dtype=np.int64),
+                )
+            if n < self.capacity:
+                # Not yet wrapped: rows 0..n-1 in order
+                X = self._X[:n].copy()
+                ys = self._ys[:n].copy()
+                yd = self._yd[:n].copy()
+                t = self._t[:n].copy()
+            else:
+                # Wrapped: oldest starts at write_idx, then wraps
+                wi = self._write_idx
+                X = np.concatenate([self._X[wi:], self._X[:wi]], axis=0).copy()
+                ys = np.concatenate([self._ys[wi:], self._ys[:wi]]).copy()
+                yd = np.concatenate([self._yd[wi:], self._yd[:wi]]).copy()
+                t = np.concatenate([self._t[wi:], self._t[:wi]]).copy()
+            return X, ys, yd, t
diff --git a/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/residual/cache.py b/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/residual/cache.py
new file mode 100644
index 00000000..09cd864f
--- /dev/null
+++ b/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/residual/cache.py
@@ -0,0 +1,135 @@
+"""Disk persistence for the residual learner.
+
+Layout under cache_dir:
+    manifest.json       schema_version, feature_dim, train_seq, best_val_mae_*
+    rls.npz             theta_s, theta_d, P, samples_trained
+    gbm_s.joblib        sklearn HistGBR for s residual
+    gbm_d.joblib        sklearn HistGBR for d residual
+    ring/               last N checkpoints
+"""
+from __future__ import annotations
+
+import json
+import os
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional
+
+import joblib
+
+import numpy as np
+
+SCHEMA_VERSION = 1
+
+
+@dataclass
+class LoadedCache:
+    feature_dim: int
+    theta_s: np.ndarray
+    theta_d: np.ndarray
+    P: np.ndarray
+    samples_trained: int
+    train_seq: int
+    best_val_mae_s: float
+    best_val_mae_d: float
+
+
+def _atomic_write_bytes(path: Path, data: bytes) -> None:
+    tmp = path.with_suffix(path.suffix + ".tmp")
+    tmp.write_bytes(data)
+    os.replace(tmp, path)
+
+
+def _atomic_write_npz(path: Path, **arrays) -> None:
+    # np.savez appends .npz automatically, so give it a stem without .npz
+    tmp = path.parent / (path.stem + ".tmp")
+    np.savez(tmp, **arrays)
+    os.replace(str(tmp) + ".npz", path)
+
+
+def _atomic_write_joblib(path: Path, obj) -> None:
+    tmp = path.with_suffix(path.suffix + ".tmp")
+    joblib.dump(obj, tmp)
+    os.replace(tmp, path)
+
+
+class ResidualCache:
+    def __init__(self, cache_dir: str, feature_dim: int):
+        self.dir = Path(cache_dir)
+        self.feature_dim = int(feature_dim)
+        self.dir.mkdir(parents=True, exist_ok=True)
+
+    @property
+    def manifest_path(self) -> Path:
+        return self.dir / "manifest.json"
+
+    @property
+    def rls_path(self) -> Path:
+        return self.dir / "rls.npz"
+
+    def write_rls(self, theta_s: np.ndarray, theta_d: np.ndarray, P: np.ndarray,
+                  samples_trained: int, best_val_mae_s: float, best_val_mae_d: float,
+                  train_seq: int) -> None:
+        _atomic_write_npz(
+            self.rls_path,
+            theta_s=theta_s.astype(np.float64),
+            theta_d=theta_d.astype(np.float64),
+            P=P.astype(np.float64),
+            samples_trained=np.int64(samples_trained),
+        )
+        manifest = dict(
+            schema_version=SCHEMA_VERSION,
+            feature_dim=self.feature_dim,
+            train_seq=int(train_seq),
+            best_val_mae_s=float(best_val_mae_s),
+            best_val_mae_d=float(best_val_mae_d),
+        )
+        _atomic_write_bytes(self.manifest_path, json.dumps(manifest, indent=2).encode())
+
+    @property
+    def gbm_s_path(self) -> Path:
+        return self.dir / "gbm_s.joblib"
+
+    @property
+    def gbm_d_path(self) -> Path:
+        return self.dir / "gbm_d.joblib"
+
+    def write_gbm(self, gbm_s, gbm_d) -> None:
+        _atomic_write_joblib(self.gbm_s_path, gbm_s)
+        _atomic_write_joblib(self.gbm_d_path, gbm_d)
+
+    def load_gbm(self):
+        if not (self.gbm_s_path.exists() and self.gbm_d_path.exists()):
+            return None, None
+        try:
+            return joblib.load(self.gbm_s_path), joblib.load(self.gbm_d_path)
+        except Exception:
+            return None, None
+
+    def load(self) -> Optional[LoadedCache]:
+        if not self.manifest_path.exists():
+            return None
+        try:
+            m = json.loads(self.manifest_path.read_text())
+        except (OSError, json.JSONDecodeError):
+            return None
+        if m.get("schema_version") != SCHEMA_VERSION:
+            return None
+        if m.get("feature_dim") != self.feature_dim:
+            return None
+        if not self.rls_path.exists():
+            return None
+        try:
+            arr = np.load(self.rls_path)
+            return LoadedCache(
+                feature_dim=self.feature_dim,
+                theta_s=arr["theta_s"],
+                theta_d=arr["theta_d"],
+                P=arr["P"],
+                samples_trained=int(arr["samples_trained"]),
+                train_seq=int(m["train_seq"]),
+                best_val_mae_s=float(m["best_val_mae_s"]),
+                best_val_mae_d=float(m["best_val_mae_d"]),
+            )
+        except Exception:
+            return None
diff --git a/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/residual/checkpoint.py b/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/residual/checkpoint.py
new file mode 100644
index 00000000..bf32f723
--- /dev/null
+++ b/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/residual/checkpoint.py
@@ -0,0 +1,73 @@
+"""Checkpoint of a known-good residual model state + CheckpointRing.
+
+A checkpoint snapshots both models (RLS state arrays + the two GBM
+regressors) plus their validation MAEs and the regime signature at save
+time. The ring keeps the last N checkpoints and protects best from eviction"""
+from __future__ import annotations
+
+import math
+from dataclasses import dataclass, field
+from typing import Any, Optional
+
+import numpy as np
+
+from autonomous_kart.nodes.pathfinder.planners.residual.regime import RegimeSignature
+
+
+@dataclass
+class Checkpoint:
+    train_seq: int
+    t_wall_ns: int
+    # RLS state
+    theta_s: np.ndarray
+    theta_d: np.ndarray
+    P: np.ndarray
+    samples_trained: int
+    # GBM state (Optional — Any here so tests don't need sklearn for the dataclass)
+    gbm_s: Optional[Any]
+    gbm_d: Optional[Any]
+    val_mae_s: float
+    val_mae_d: float
+    rls_val_mae_s: float
+    rls_val_mae_d: float
+    regime: RegimeSignature
+
+
+class CheckpointRing:
+    def __init__(self, size: int = 5):
+        self.size = max(1, int(size))
+        self.entries: list[Checkpoint] = []
+
+    def is_empty(self) -> bool:
+        return not self.entries
+
+    def record(self, cp: Checkpoint) -> None:
+        if len(self.entries) < self.size:
+            self.entries.append(cp)
+            return
+        # Need to evict. Find the all-time-best entry by val_mae_s
+        best_idx = self._best_index()
+        # Oldest is index 0. Don't evict the best (unless it's the only slot).
+        evict_idx = 0
+        if best_idx == 0 and self.size > 1:
+            evict_idx = 1
+        del self.entries[evict_idx]
+        self.entries.append(cp)
+
+    def best_recent(self) -> Optional[Checkpoint]:
+        idx = self._best_index()
+        if idx is None:
+            return None
+        return self.entries[idx]
+
+    def _best_index(self) -> Optional[int]:
+        best_i = None
+        best_v = math.inf
+        for i, c in enumerate(self.entries):
+            v = c.val_mae_s
+            if not math.isfinite(v):
+                continue
+            if v < best_v:
+                best_v = v
+                best_i = i
+        return best_i
diff --git a/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/residual/regime.py b/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/residual/regime.py
new file mode 100644
index 00000000..2bd22ee9
--- /dev/null
+++ b/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/residual/regime.py
@@ -0,0 +1,38 @@
+"""RegimeSignature: tiny fingerprint of recent driving conditions.
+
+Used to gate revert decisions — a checkpoint trained in one regime is only restored if the current regime is similar"""
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import numpy as np
+
+
+@dataclass
+class RegimeSignature:
+    mean_v: float
+    mean_abs_d: float
+    std_kappa: float
+
+    @classmethod
+    def from_arrays(cls, v: np.ndarray, d: np.ndarray, kappa: np.ndarray) -> "RegimeSignature":
+        if v.size == 0:
+            return cls(0.0, 0.0, 0.0)
+        return cls(
+            mean_v=float(np.mean(v)),
+            mean_abs_d=float(np.mean(np.abs(d))),
+            std_kappa=float(np.std(kappa)),
+        )
+
+
+def similar(a: RegimeSignature, b: RegimeSignature,
+            *, dv_tol: float, dd_tol: float, dkappa_rel_tol: float) -> bool:
+    """Return True iff a and b are within all three tolerances."""
+    if abs(a.mean_v - b.mean_v) > dv_tol:
+        return False
+    if abs(a.mean_abs_d - b.mean_abs_d) > dd_tol:
+        return False
+    denom = max(a.std_kappa, b.std_kappa, 1e-3)
+    if abs(a.std_kappa - b.std_kappa) / denom > dkappa_rel_tol:
+        return False
+    return True
diff --git a/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/residual/trainer.py b/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/residual/trainer.py
new file mode 100644
index 00000000..e7b172c4
--- /dev/null
+++ b/src/autonomous_kart/autonomous_kart/nodes/pathfinder/planners/residual/trainer.py
@@ -0,0 +1,164 @@
+"""Background GBM trainer.
+
+Runs in a daemon thread. Trains two sklearn HistGradientBoostingRegressors
+from a chronological 80/20 split of the TrainBuffer snapshot. The same
+held-out 20% is also scored against the RLS to compare"""
+from __future__ import annotations
+
+import threading
+import time
+from dataclasses import dataclass, field
+from typing import Any, Callable, Optional
+
+import numpy as np
+
+
+@dataclass
+class TrainResult:
+    train_seq: int
+    t_wall_ns: int
+    n_samples: int
+    train_wall_ms: float
+    gbm_s: Any
+    gbm_d: Any
+    train_mae_s: float
+    train_mae_d: float
+    val_mae_s: float
+    val_mae_d: float
+    rls_val_mae_s: float
+    rls_val_mae_d: float
+    error: Optional[str] = None
+
+
+class GBMTrainer:
+    def __init__(self, *, buffer, rls_predict_batch: Callable,
+                 max_iter: int, max_depth: int, learning_rate: float,
+                 min_samples_leaf: int, min_samples_to_train: int,
+                 retrain_secs: float, retrain_every_samples: int):
+        self.buffer = buffer
+        self.rls_predict_batch = rls_predict_batch
+        self.max_iter = int(max_iter)
+        self.max_depth = int(max_depth)
+        self.learning_rate = float(learning_rate)
+        self.min_samples_leaf = int(min_samples_leaf)
+        self.min_samples_to_train = int(min_samples_to_train)
+        self.retrain_secs = float(retrain_secs)
+        self.retrain_every_samples = int(retrain_every_samples)
+
+        self._lock = threading.Lock()
+        self._stop_event = threading.Event()
+        self._thread: Optional[threading.Thread] = None
+        self._last_buffer_size_at_train = 0
+        self._last_train_t = 0.0
+
+        # Public state (read under self._lock from outside).
+        self.gbm_s = None
+        self.gbm_d = None
+        self.train_seq = 0
+        self.last_result: Optional[TrainResult] = None
+        self.last_error: Optional[str] = None
+
+    def _snapshot_and_filter(self):
+        X, ys, yd, _ = self.buffer.snapshot()
+        if len(X) < self.min_samples_to_train:
+            return None
+        finite = np.isfinite(X).all(axis=1) & np.isfinite(ys) & np.isfinite(yd)
+        X = X[finite]; ys = ys[finite]; yd = yd[finite]
+        if len(X) < self.min_samples_to_train:
+            return None
+        return X, ys, yd
+
+    def train_once(self) -> Optional[TrainResult]:
+        from sklearn.ensemble import HistGradientBoostingRegressor
+
+        snap = self._snapshot_and_filter()
+        if snap is None:
+            return None
+        X, ys, yd = snap
+        n = len(X)
+        split = int(0.8 * n)
+        Xtr, Xte = X[:split], X[split:]
+        ystr, yste = ys[:split], ys[split:]
+        ydtr, ydte = yd[:split], yd[split:]
+
+        t0 = time.perf_counter()
+        try:
+            kw = dict(max_iter=self.max_iter, max_depth=self.max_depth,
+                      learning_rate=self.learning_rate,
+                      min_samples_leaf=self.min_samples_leaf)
+            gbm_s = HistGradientBoostingRegressor(**kw).fit(Xtr, ystr)
+            gbm_d = HistGradientBoostingRegressor(**kw).fit(Xtr, ydtr)
+        except Exception as exc:
+            err = repr(exc)
+            with self._lock:
+                self.last_error = err
+            return TrainResult(
+                train_seq=self.train_seq, t_wall_ns=time.time_ns(),
+                n_samples=n, train_wall_ms=(time.perf_counter() - t0) * 1000.0,
+                gbm_s=None, gbm_d=None,
+                train_mae_s=float("nan"), train_mae_d=float("nan"),
+                val_mae_s=float("nan"), val_mae_d=float("nan"),
+                rls_val_mae_s=float("nan"), rls_val_mae_d=float("nan"),
+                error=err,
+            )
+        train_wall_ms = (time.perf_counter() - t0) * 1000.0
+
+        pred_s_tr = gbm_s.predict(Xtr); pred_d_tr = gbm_d.predict(Xtr)
+        pred_s_te = gbm_s.predict(Xte); pred_d_te = gbm_d.predict(Xte)
+        rls_s_te, rls_d_te = self.rls_predict_batch(Xte)
+
+        result = TrainResult(
+            train_seq=self.train_seq + 1, t_wall_ns=time.time_ns(),
+            n_samples=n, train_wall_ms=train_wall_ms,
+            gbm_s=gbm_s, gbm_d=gbm_d,
+            train_mae_s=float(np.mean(np.abs(ystr - pred_s_tr))),
+            train_mae_d=float(np.mean(np.abs(ydtr - pred_d_tr))),
+            val_mae_s=float(np.mean(np.abs(yste - pred_s_te))),
+            val_mae_d=float(np.mean(np.abs(ydte - pred_d_te))),
+            rls_val_mae_s=float(np.mean(np.abs(yste - rls_s_te))),
+            rls_val_mae_d=float(np.mean(np.abs(ydte - rls_d_te))),
+        )
+
+        with self._lock:
+            self.gbm_s = gbm_s
+            self.gbm_d = gbm_d
+            self.train_seq = result.train_seq
+            self.last_result = result
+            self.last_error = None
+        return result
+
+    def _loop(self):
+        while not self._stop_event.is_set():
+            self._stop_event.wait(timeout=min(self.retrain_secs, 1.0))
+            if self._stop_event.is_set():
+                return
+            now = time.monotonic()
+            buf_size = self.buffer.size
+            time_due = (now - self._last_train_t) >= self.retrain_secs
+            sample_due = self.retrain_every_samples > 0 and (
+                buf_size - self._last_buffer_size_at_train) >= self.retrain_every_samples
+            if not (time_due or sample_due):
+                continue
+            result = self.train_once()
+            if result is not None:
+                self._last_train_t = now
+                self._last_buffer_size_at_train = buf_size
+
+    def start(self) -> None:
+        if self._thread is not None and self._thread.is_alive():
+            return
+        self._stop_event.clear()
+        self._thread = threading.Thread(target=self._loop, name="gbm-trainer", daemon=True)
+        self._thread.start()
+
+    def stop(self, timeout: float = 2.0) -> None:
+        self._stop_event.set()
+        if self._thread is not None:
+            self._thread.join(timeout=timeout)
+
+    def is_alive(self) -> bool:
+        return self._thread is not None and self._thread.is_alive()
+
+    def snapshot_models(self):
+        with self._lock:
+            return self.gbm_s, self.gbm_d, self.last_result
diff --git a/src/autonomous_kart/autonomous_kart/params/actuators.yaml b/src/autonomous_kart/autonomous_kart/params/actuators.yaml
index c3bb5b1a..e20bb90e 100644
--- a/src/autonomous_kart/autonomous_kart/params/actuators.yaml
+++ b/src/autonomous_kart/autonomous_kart/params/actuators.yaml
@@ -2,5 +2,5 @@
 e_comms_node:
   ros__parameters:
     min_speed: 0.0        # m/s
-    max_steering: 100.0   # %
-    min_steering: -100.0  # %
+    max_steering: 60.0   # %
+    min_steering: -60.0  # %
diff --git a/src/autonomous_kart/autonomous_kart/params/imu.yaml b/src/autonomous_kart/autonomous_kart/params/imu.yaml
index a2a9248f..a3ca8853 100644
--- a/src/autonomous_kart/autonomous_kart/params/imu.yaml
+++ b/src/autonomous_kart/autonomous_kart/params/imu.yaml
@@ -12,8 +12,8 @@ imu_node:
     gyro_lsb_per_dps: 131.0
 
     # Motion guards used during calibration per sample to force retry
-    gyro_motion_thresh: 0.05      # rad/s, ~3 deg/s of jitter allowed
-    accel_motion_thresh: 0.5      # m/s^2 deviation from |g|
+    gyro_motion_thresh: 0.08      # rad/s, ~4.6 deg/s of jitter allowed
+    accel_motion_thresh: 1.5      # m/s^2 tolerance for mean |accel| vs |g| (checked at end of calibration)
 
     cmd_vel_zero_eps: 0.01        # m/s
     cmd_vel_max_age_s: 1.0
diff --git a/src/autonomous_kart/autonomous_kart/params/localization.yaml b/src/autonomous_kart/autonomous_kart/params/localization.yaml
index ce429294..f59d811d 100644
--- a/src/autonomous_kart/autonomous_kart/params/localization.yaml
+++ b/src/autonomous_kart/autonomous_kart/params/localization.yaml
@@ -9,6 +9,9 @@ localization_node:
     vtg_yaw_var_max: 1.0       # rad^2
     vtg_speed_var_max: 100.0   # (m/s)^2
 
+    # Deadband below which |v| is treated as "kart not actually in reverse"
+    reverse_flip_deadband_mps: 0.3
+
     gps_dropout_warn: 2.0      # s
     sigma_xy_floor: 0.02       # m
 
@@ -21,9 +24,10 @@ localization_node:
     # Sim-only noise + actuator delay
     sim_odom_rate_hz: 10.0             # /odom publish rate (Hz) matches real GPS cadence
     sim_noise_seed: 0                  # RNG seed (deterministic noise sequence)
-    sim_actuator_delay_s: 0.08         # 80 ms
+    sim_actuator_delay_s: 0.20         # 200 ms
     sim_cmd_motor_noise_mps: 0.3       # std on throttle command (m/s)
     sim_cmd_steer_noise_deg: 1.0       # std on steering command (deg)
+    sim_steer_slop_deg: 2.0            # mechanical backlash half-width (deg)
     sim_pos_noise_m: 0.03              # std on x, y output (m)
     sim_yaw_noise_deg: 0.5             # std on yaw output (deg)
     sim_speed_noise_mps: 0.05          # std on speed output (m/s)
\ No newline at end of file
diff --git a/src/autonomous_kart/autonomous_kart/params/opencv_pathfinder.yaml b/src/autonomous_kart/autonomous_kart/params/opencv_pathfinder.yaml
index 898ec451..0f28091b 100644
--- a/src/autonomous_kart/autonomous_kart/params/opencv_pathfinder.yaml
+++ b/src/autonomous_kart/autonomous_kart/params/opencv_pathfinder.yaml
@@ -3,6 +3,10 @@ opencv_pathfinder_node:
     log_dir: "/ws/logs/"
     log_file: "opencv_pathfinder"
     pic_offset: 5
-    percent_of_img: 0.5
+    top_per: 0.5    #Top percent to exclude
+    bottom_per: 0.0 #Bottom percent to exclude
     pixel_range: 3
-    capture_frequency: 100
\ No newline at end of file
+    capture_frequency: 100
+    use_sim_vision: false
+    
+    
\ No newline at end of file
diff --git a/src/autonomous_kart/autonomous_kart/params/pathfinder.yaml b/src/autonomous_kart/autonomous_kart/params/pathfinder.yaml
index b30477f6..e343233e 100644
--- a/src/autonomous_kart/autonomous_kart/params/pathfinder.yaml
+++ b/src/autonomous_kart/autonomous_kart/params/pathfinder.yaml
@@ -19,7 +19,7 @@
 pathfinder_node:
   ros__parameters:
     # Active planner: mpc | opencv | pure_pursuit
-    planner: pure_pursuit
+    planner: mpc
 
     # pure_pursuit
     pure_pursuit.use_velocity_scaled_lookahead: true
@@ -39,10 +39,18 @@ pathfinder_node:
     pure_pursuit.rejoin_cte_deactivate: 1.0
     pure_pursuit.rejoin_merge_lookahead_m: 15.0
     pure_pursuit.latency_s: 0.3
+    # Was 3.0 (default landed in commit 7f34634 on 2026-05-18). Pre-refactor
+    # PP went through e_comms's `steering_deg *= 2`, so the effective net
+    # amplification was ×2. Restoring 2.0 here matches the historical tune;
+    # 3.0 over-gained the steering loop and caused oscillating divergence
+    # at sharp corners (run 20260519_214619 lost the line at the top-left
+    # apex, cmd_steering ringing ±40° before closest_idx wrapped to idx≈3).
+    pure_pursuit.steering_gain: 2.0
 
     # opencv
 
     # mpc: sampling MPC
+    mpc.steering_gain: 2.0
     mpc.horizon_steps: 20
     mpc.dt_s: 0.05
     mpc.num_samples: 164
@@ -88,6 +96,45 @@ pathfinder_node:
     mpc.residual.initial_cov: 1000.0
     mpc.residual.min_train_speed_mps: 0.5
     mpc.residual.error_window: 200
+    # tight so model loss does not explode
+    mpc.residual.outlier_threshold_m: 2.0
+    # refuses to train when |d| >= this so proj is not too noisy
+    mpc.residual.max_train_d_m: 3.0
+    mpc.residual.max_p_trace: 100000.0
+    mpc.residual.max_theta_norm: 50.0
+    # below this many RLS updates, predict() returns (0, 0) so MPC sees no noise
+    mpc.residual.rls_warmup_samples: 50
+#    GBM parms
+    mpc.residual.gbm_enabled: false
+    mpc.residual.gbm_buffer_capacity: 18000
+    mpc.residual.gbm_max_iter: 60
+    mpc.residual.gbm_max_depth: 3
+    mpc.residual.gbm_learning_rate: 0.1
+    mpc.residual.gbm_min_samples_leaf: 20
+    mpc.residual.gbm_retrain_secs: 10.0
+    mpc.residual.gbm_min_samples_to_train: 1200
+    mpc.residual.gbm_retrain_every_samples: 1200
+    mpc.residual.gbm_select_eps_s_m: 0.0
+    mpc.residual.gbm_select_eps_d_m: 0.005
+    mpc.residual.gbm_predict_clip_m: 0.5
+    # Checkpoints + revert
+    mpc.residual.checkpoint_ring_size: 5
+    mpc.residual.checkpoint_keep_factor: 1.5
+    mpc.residual.revert_clip_rate_window: 600
+    mpc.residual.revert_clip_rate_threshold: 0.30
+    mpc.residual.regime_dv_tol_mps: 1.0
+    mpc.residual.regime_dd_tol_m: 0.3
+    mpc.residual.regime_dkappa_rel_tol: 0.5
+    # Apply-mode gate (effective_mode = shadow until this many accepted samples this run)
+    mpc.residual.apply_min_samples_this_run: 1200
+    # Disk cache (used in Phase 2 task wiring; default on)
+    mpc.residual.cache_enabled: true
+    mpc.residual.cache_dir: /root/.cache/residual_cache
+    mpc.residual.cache_load_on_start: true
+
+    # opencv
+    opencv.angle_threshold: 5.0
+    opencv.throttle_mps: 5.0
 
     #  safety wrapper 
     safety.enabled: true
diff --git a/src/autonomous_kart/autonomous_kart/params/system.yaml b/src/autonomous_kart/autonomous_kart/params/system.yaml
index e172b17f..4bd14c21 100644
--- a/src/autonomous_kart/autonomous_kart/params/system.yaml
+++ b/src/autonomous_kart/autonomous_kart/params/system.yaml
@@ -1,7 +1,9 @@
 /**:
   ros__parameters:
     simulation_mode: false
-    debug_mode: true # Turns on debug info on images in opencv
+    debug_mode: false # Turns on debug info on images in opencv
     system_state: "IDLE" # "STOPPED", "IDLE", "MANUAL", "AUTONOMOUS"
     system_frequency: 60 # Speed of hot loop (Hertz)
     line_path: "/ws/data/racing_line/line6.csv"
+    jetson_ip: "evc-jetson.tail73145c.ts.net"
+    jetson_username: "evc" 
diff --git a/src/autonomous_kart/setup.py b/src/autonomous_kart/setup.py
index 7838787c..ca2e589e 100644
--- a/src/autonomous_kart/setup.py
+++ b/src/autonomous_kart/setup.py
@@ -19,6 +19,7 @@
         'autonomous_kart.nodes.localization',
         'autonomous_kart.nodes.pathfinder.strategies',
         'autonomous_kart.nodes.pathfinder.planners',
+        'autonomous_kart.nodes.pathfinder.planners.residual',
         'autonomous_kart.nodes.imu',
     ],
     data_files=[
diff --git a/src/autonomous_kart/test/test_gbm_trainer.py b/src/autonomous_kart/test/test_gbm_trainer.py
new file mode 100644
index 00000000..bc4a9559
--- /dev/null
+++ b/src/autonomous_kart/test/test_gbm_trainer.py
@@ -0,0 +1,88 @@
+"""Tests for GBMTrainer: synchronous train_once + threaded run loop."""
+import time
+
+import numpy as np
+
+from autonomous_kart.nodes.pathfinder.planners.residual.buffer import TrainBuffer
+from autonomous_kart.nodes.pathfinder.planners.residual.trainer import (
+    GBMTrainer, TrainResult,
+)
+
+
+def _make_buffer(n=2000, dim=14, seed=0):
+    rng = np.random.default_rng(seed)
+    buf = TrainBuffer(capacity=n, feature_dim=dim)
+    for i in range(n):
+        phi = rng.standard_normal(dim)
+        # target_s is a nonlinear function so the GBM has something to learn
+        r_s = 0.3 * phi[1] - 0.2 * phi[2] * phi[3] + 0.1 * rng.standard_normal()
+        r_d = 0.1 * phi[4] ** 2 + 0.05 * rng.standard_normal()
+        buf.append(phi, r_s=float(r_s), r_d=float(r_d), t_ns=i)
+    return buf
+
+
+def _rls_predict(X):
+    """Stub RLS predictor that returns zeros (so GBM clearly beats it)."""
+    return np.zeros(len(X)), np.zeros(len(X))
+
+
+def test_train_once_happy_path():
+    buf = _make_buffer(n=2000)
+    trainer = GBMTrainer(
+        buffer=buf,
+        rls_predict_batch=_rls_predict,
+        max_iter=60, max_depth=3, learning_rate=0.1, min_samples_leaf=20,
+        min_samples_to_train=1200, retrain_secs=10.0, retrain_every_samples=1200,
+    )
+    result = trainer.train_once()
+    assert isinstance(result, TrainResult)
+    assert result.gbm_s is not None
+    assert result.gbm_d is not None
+    assert np.isfinite(result.val_mae_s)
+    assert np.isfinite(result.rls_val_mae_s)
+    # GBM should beat the zero baseline
+    assert result.val_mae_s < result.rls_val_mae_s
+
+
+def test_train_once_returns_none_with_too_few_samples():
+    buf = _make_buffer(n=100)
+    trainer = GBMTrainer(
+        buffer=buf, rls_predict_batch=_rls_predict,
+        max_iter=60, max_depth=3, learning_rate=0.1, min_samples_leaf=20,
+        min_samples_to_train=1200, retrain_secs=10.0, retrain_every_samples=1200,
+    )
+    assert trainer.train_once() is None
+
+
+def test_train_once_catches_nan_samples():
+    buf = _make_buffer(n=2000)
+    # Poison the buffer with NaNs
+    X, ys, yd, _ = buf.snapshot()
+    X[0, 0] = float("nan")
+    # Re-insert by clearing/reappending wouldn't be clean; instead test that
+    # GBMTrainer drops non-finite rows internally.
+    buf2 = TrainBuffer(capacity=2000, feature_dim=14)
+    for i in range(len(X)):
+        buf2.append(X[i], r_s=float(ys[i]), r_d=float(yd[i]), t_ns=i)
+    trainer = GBMTrainer(
+        buffer=buf2, rls_predict_batch=_rls_predict,
+        max_iter=60, max_depth=3, learning_rate=0.1, min_samples_leaf=20,
+        min_samples_to_train=1200, retrain_secs=10.0, retrain_every_samples=1200,
+    )
+    result = trainer.train_once()
+    assert result is not None  # NaN row was filtered, train succeeded
+    assert np.isfinite(result.val_mae_s)
+
+
+def test_thread_lifecycle():
+    buf = _make_buffer(n=2000)
+    trainer = GBMTrainer(
+        buffer=buf, rls_predict_batch=_rls_predict,
+        max_iter=20, max_depth=3, learning_rate=0.1, min_samples_leaf=20,
+        min_samples_to_train=1200, retrain_secs=0.1, retrain_every_samples=999999,
+    )
+    trainer.start()
+    time.sleep(0.5)   # allow at least one train pass
+    assert trainer.train_seq >= 1
+    trainer.stop(timeout=2.0)
+    assert not trainer.is_alive()
diff --git a/src/autonomous_kart/test/test_imu_node.py b/src/autonomous_kart/test/test_imu_node.py
index 01c37824..7d6de0cc 100644
--- a/src/autonomous_kart/test/test_imu_node.py
+++ b/src/autonomous_kart/test/test_imu_node.py
@@ -246,6 +246,60 @@ def test_calibration_aborts_on_accel_anomaly(imu_factory):
 # --------------------------------------------------- happy-path calibration
 
 
+def test_calibration_levels_accel_xy(imu_factory, tmp_path):
+    """Tilted chassis: accel calibration must rotate measured gravity onto +Z.
+
+    Pick a raw accel that, after R_MOUNT (diag(1, -1, -1)), points slightly off
+    +Z. After calibration the published accel should land at (0, 0, +|g|).
+    """
+    import numpy as np
+
+    cache = tmp_path / "out.json"
+    # Tilted chassis: |raw| == 16384 (i.e. magnitude |g|), but split between
+    # x and z. (4000, 0, 15873) gives ~14 deg tilt around the chip's +Y.
+    raw_ax, raw_az = 4000, 15873
+    with imu_factory(cache_path=cache, calibration_samples=6) as (node, bus, _rclpy):
+        bus.next_read = _make_burst(accel_raw=(raw_ax, 0, raw_az), gyro_raw=(0, 0, 0))
+
+        for _ in range(6):
+            node.publish_imu()
+
+        assert node.state == CALIBRATED, node.last_error
+        # Apply learned R to the same raw accel: x/y must zero out and z must
+        # carry the full magnitude (level rotation preserves length).
+        accel_m = np.array([raw_ax, 0, raw_az]) / 16384.0 * node.default_g
+        corrected = node.R @ accel_m
+        assert corrected[0] == pytest.approx(0.0, abs=1e-6)
+        assert corrected[1] == pytest.approx(0.0, abs=1e-6)
+        assert abs(corrected[2]) == pytest.approx(np.linalg.norm(accel_m), rel=1e-9)
+
+        # Cache round-trips R.
+        payload = json.loads(cache.read_text())
+        assert "R" in payload
+        assert np.allclose(np.asarray(payload["R"]), node.R)
+
+
+def test_cache_round_trip_restores_R(imu_factory, tmp_path):
+    """A cached R must be reloaded so the kart doesn't have to re-level."""
+    import numpy as np
+
+    cache = tmp_path / "cal.json"
+    R_saved = [
+        [0.9848, 0.0, 0.1736],
+        [0.0, 1.0, 0.0],
+        [-0.1736, 0.0, 0.9848],
+    ]
+    cache.write_text(json.dumps({
+        "gyro_bias": [0.0, 0.0, 0.0],
+        "R": R_saved,
+        "samples": 200,
+        "timestamp": 0.0,
+    }))
+    with imu_factory(cache_path=cache) as (node, _bus, _rclpy):
+        assert node.state == CALIBRATED
+        assert np.allclose(node.R, np.asarray(R_saved))
+
+
 def test_calibration_completes_and_writes_cache(imu_factory, tmp_path):
     cache = tmp_path / "out.json"
     # Tiny non-zero gyro so the bias is testable, well below motion threshold.
diff --git a/src/autonomous_kart/test/test_localization_node.py b/src/autonomous_kart/test/test_localization_node.py
index 808444de..e3c65e41 100644
--- a/src/autonomous_kart/test/test_localization_node.py
+++ b/src/autonomous_kart/test/test_localization_node.py
@@ -389,3 +389,92 @@ def test_localization_real_mode_wheel_speed_pins_v_against_accel_bias(ros_ctx):
             )
         finally:
             node.destroy_node()
+
+
+def test_localization_reverse_flip_does_not_fire_during_init(ros_ctx):
+    """Regression for the 2026-05-19 ±π yaw-lock bug.
+
+    When the kart is parked at rest, VESC ERPM noise can briefly push the
+    wheel-speed reading below zero. Before the fix, the VTG reverse-flip
+    rule treated that as 'kart in reverse' and rotated the very first GPS
+    yaw by π, locking the EKF's heading 180° off for the rest of the run.
+
+    The flip MUST NOT fire on the init fix — there is no reliable signed
+    velocity source yet.
+    """
+    import math
+    from std_msgs.msg import Float32
+
+    with ros_ctx(_real_params()) as rclpy:
+        node = LocalizationNode()
+        try:
+            node._imu_cb(_imu_msg(omega_z=0.0, accel_x=0.0, stamp_sec=0))
+            # Simulate VESC noise at rest before the first GPS fix.
+            node._wheel_speed_cb(Float32(data=-0.05))
+            # First GPS fix arrives with VTG yaw pointing east-ish.
+            node.gps_callback(_gps_odom(x=0.0, y=0.0, yaw=0.5, speed=0.0))
+            assert node.ekf.initialized
+            yaw = float(node.ekf.x[2])
+            # Yaw should match VTG yaw, NOT VTG yaw + π.
+            assert abs(yaw - 0.5) < 1e-6, (
+                f"EKF init yaw={yaw}, expected ~0.5 (flip must not fire on init)"
+            )
+        finally:
+            node.destroy_node()
+
+
+def test_localization_reverse_flip_ignores_noise_below_deadband(ros_ctx):
+    """Even after init, the flip only fires when wheel_v is clearly in reverse
+    (|v| > reverse_flip_deadband_mps). Tiny negative noise must not flip yaw."""
+    import math
+    from std_msgs.msg import Float32
+
+    with ros_ctx(_real_params()) as rclpy:
+        node = LocalizationNode()
+        try:
+            node._imu_cb(_imu_msg(omega_z=0.0, accel_x=0.0, stamp_sec=0))
+            # Init cleanly while moving forward.
+            node.gps_callback(_gps_odom(x=0.0, y=0.0, yaw=0.0, speed=2.0))
+            assert node.ekf.initialized
+            # Now the kart slows to rest; VESC reports noisy ~-0.05 m/s.
+            node._wheel_speed_cb(Float32(data=-0.05))
+            # A new VTG fix with yaw=0.5 should not get flipped.
+            node.gps_callback(_gps_odom(x=0.1, y=0.0, yaw=0.5, speed=0.0))
+            yaw = float(node.ekf.x[2])
+            # Yaw won't equal 0.5 exactly (EKF fuses with prior), but it
+            # must be near 0 / 0.5 — NOT pulled toward π.
+            assert abs(yaw) < 1.0, (
+                f"EKF yaw={yaw}; noise should not have flipped it toward ±π"
+            )
+        finally:
+            node.destroy_node()
+
+
+def test_localization_reverse_flip_does_fire_for_real_reverse(ros_ctx):
+    """The flip must still fire when the kart is genuinely in reverse
+    (signed wheel speed clearly below -deadband)."""
+    import math
+    from std_msgs.msg import Float32
+
+    with ros_ctx(_real_params()) as rclpy:
+        node = LocalizationNode()
+        try:
+            node._imu_cb(_imu_msg(omega_z=0.0, accel_x=0.0, stamp_sec=0))
+            # Init cleanly forward.
+            node.gps_callback(_gps_odom(x=0.0, y=0.0, yaw=0.0, speed=2.0))
+            assert node.ekf.initialized
+            # Now kart genuinely reverses: wheel-v = -1.5 m/s, well below the
+            # default 0.3 deadband.
+            node._wheel_speed_cb(Float32(data=-1.5))
+            # A VTG fix arrives with course-over-ground = 0 (because COG is
+            # the direction of motion, which is +x even when kart faces -x).
+            # The flip should rotate yaw_meas to π, and the EKF should pull
+            # toward π (not stay near 0).
+            for _ in range(5):
+                node.gps_callback(_gps_odom(x=0.1, y=0.0, yaw=0.0, speed=1.5))
+            yaw = float(node.ekf.x[2])
+            assert abs(abs(yaw) - math.pi) < 0.5, (
+                f"EKF yaw={yaw}; real reverse should have flipped it toward π"
+            )
+        finally:
+            node.destroy_node()
diff --git a/src/autonomous_kart/test/test_master_residual_api.py b/src/autonomous_kart/test/test_master_residual_api.py
new file mode 100644
index 00000000..31763d03
--- /dev/null
+++ b/src/autonomous_kart/test/test_master_residual_api.py
@@ -0,0 +1,71 @@
+"""Tests for the residual API plumbing in master_node + master_api."""
+import pytest
+
+rclpy = pytest.importorskip("rclpy")  # skip whole file on machines w/o rclpy
+from std_msgs.msg import Float32MultiArray
+
+
+def _default_params(state="IDLE"):
+    """MasterNode requires these globals declared or it asserts on init."""
+    return {"system_state": state, "system_frequency": 60}
+
+
+def _status_msg_with_train_seq(seq, val_mae_s=0.10, val_mae_d=0.03):
+    """Build an 88-float /mpc/status message with the train_seq slot set."""
+    data = [0.0] * 88
+    data[0] = 0.0    # mode
+    data[1] = 1.0    # success
+    data[67] = float(seq)   # train_seq
+    data[70] = float(val_mae_s)
+    data[71] = float(val_mae_d)
+    data[72] = float(val_mae_s + 0.05)  # rls_val_mae_s
+    data[73] = float(val_mae_d + 0.02)  # rls_val_mae_d
+    data[74] = 1.0   # active_model = GBM
+    m = Float32MultiArray()
+    m.data = data
+    return m
+
+
+def test_residual_log_records_new_train_seq(ros_ctx):
+    from autonomous_kart.nodes.master.master_node import MasterNode
+    with ros_ctx(_default_params()):
+        node = MasterNode()
+        try:
+            node._mpc_status_callback(_status_msg_with_train_seq(seq=1))
+            node._mpc_status_callback(_status_msg_with_train_seq(seq=1))
+            node._mpc_status_callback(_status_msg_with_train_seq(seq=2, val_mae_s=0.08))
+            events = node.get_residual_log(limit=10)
+            assert len(events) == 2
+            assert events[0]["train_seq"] == 2     # newest first
+            assert events[1]["train_seq"] == 1
+        finally:
+            node.destroy_node()
+
+
+def test_residual_log_limit(ros_ctx):
+    from autonomous_kart.nodes.master.master_node import MasterNode
+    with ros_ctx(_default_params()):
+        node = MasterNode()
+        try:
+            for s in range(5):
+                node._mpc_status_callback(_status_msg_with_train_seq(seq=s + 1))
+            events = node.get_residual_log(limit=2)
+            assert len(events) == 2
+            assert events[0]["train_seq"] == 5
+            assert events[1]["train_seq"] == 4
+        finally:
+            node.destroy_node()
+
+
+def test_residual_status_returns_latest_snapshot(ros_ctx):
+    from autonomous_kart.nodes.master.master_node import MasterNode
+    with ros_ctx(_default_params()):
+        node = MasterNode()
+        try:
+            node._mpc_status_callback(_status_msg_with_train_seq(seq=42, val_mae_s=0.07))
+            snap = node.get_residual_status()
+            assert snap is not None
+            assert snap["train_seq"] == 42
+            assert abs(snap["last_val_mae_s"] - 0.07) < 1e-6
+        finally:
+            node.destroy_node()
diff --git a/src/autonomous_kart/test/test_residual_buffer.py b/src/autonomous_kart/test/test_residual_buffer.py
new file mode 100644
index 00000000..51742690
--- /dev/null
+++ b/src/autonomous_kart/test/test_residual_buffer.py
@@ -0,0 +1,67 @@
+"""Tests for the TrainBuffer ring."""
+import threading
+
+import numpy as np
+import pytest
+
+from autonomous_kart.nodes.pathfinder.planners.residual.buffer import TrainBuffer
+
+
+def test_buffer_grows_then_wraps():
+    buf = TrainBuffer(capacity=4, feature_dim=3)
+    for i in range(3):
+        phi = np.array([float(i), float(i + 1), float(i + 2)])
+        buf.append(phi, r_s=float(i) * 0.1, r_d=float(i) * 0.2, t_ns=i)
+    assert buf.size == 3
+    X, ys, yd, t = buf.snapshot()
+    assert X.shape == (3, 3)
+    np.testing.assert_allclose(X[1], [1, 2, 3])
+    np.testing.assert_allclose(ys, [0.0, 0.1, 0.2])
+    np.testing.assert_allclose(t, [0, 1, 2])
+
+    # Fill past capacity — oldest entries drop.
+    for i in range(3, 8):
+        buf.append(np.array([float(i)] * 3), r_s=0.0, r_d=0.0, t_ns=i)
+    assert buf.size == 4
+    X, ys, yd, t = buf.snapshot()
+    # Most recent 4 entries (i=4..7), in chronological order.
+    np.testing.assert_allclose(t, [4, 5, 6, 7])
+
+
+def test_snapshot_returns_independent_copy():
+    buf = TrainBuffer(capacity=2, feature_dim=2)
+    buf.append(np.array([1.0, 2.0]), r_s=0.5, r_d=0.5, t_ns=0)
+    X, ys, yd, t = buf.snapshot()
+    X[0, 0] = 99.0
+    X2, *_ = buf.snapshot()
+    assert X2[0, 0] == 1.0  # buffer wasn't mutated
+
+
+def test_concurrent_writes_no_torn_rows():
+    buf = TrainBuffer(capacity=200, feature_dim=4)
+
+    def writer(start, count):
+        for i in range(start, start + count):
+            phi = np.array([float(i)] * 4)
+            buf.append(phi, r_s=float(i), r_d=-float(i), t_ns=i)
+
+    threads = [threading.Thread(target=writer, args=(k * 50, 50)) for k in range(4)]
+    for t in threads: t.start()
+    for t in threads: t.join()
+
+    X, ys, yd, _ = buf.snapshot()
+    # Each row's 4 features should be all-equal; r_s should equal +feature; r_d should equal -feature
+    for row, s, d in zip(X, ys, yd):
+        v = row[0]
+        assert all(row == v), f"torn row: {row}"
+        assert s == v, f"r_s mismatch with row value: {s} vs {v}"
+        assert d == -v, f"r_d mismatch with row value: {d} vs {v}"
+
+
+def test_initial_state():
+    buf = TrainBuffer(capacity=10, feature_dim=14)
+    assert buf.size == 0
+    assert buf.capacity == 10
+    X, ys, yd, t = buf.snapshot()
+    assert X.shape == (0, 14)
+    assert ys.shape == (0,)
diff --git a/src/autonomous_kart/test/test_residual_cache.py b/src/autonomous_kart/test/test_residual_cache.py
new file mode 100644
index 00000000..fbe14ef8
--- /dev/null
+++ b/src/autonomous_kart/test/test_residual_cache.py
@@ -0,0 +1,89 @@
+"""Tests for the residual disk cache (manifest + RLS state; GBM tested separately)."""
+import json
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+from autonomous_kart.nodes.pathfinder.planners.residual.cache import (
+    ResidualCache, SCHEMA_VERSION,
+)
+
+
+def _rls_state():
+    return dict(
+        theta_s=np.arange(14, dtype=np.float64),
+        theta_d=-np.arange(14, dtype=np.float64),
+        P=np.eye(14) * 7.0,
+        samples_trained=123,
+    )
+
+
+def test_round_trip_rls(tmp_path: Path):
+    cache = ResidualCache(str(tmp_path), feature_dim=14)
+    state = _rls_state()
+    cache.write_rls(**state, best_val_mae_s=0.15, best_val_mae_d=0.04, train_seq=42)
+    loaded = cache.load()
+    assert loaded is not None
+    assert loaded.feature_dim == 14
+    np.testing.assert_allclose(loaded.theta_s, state["theta_s"])
+    np.testing.assert_allclose(loaded.theta_d, state["theta_d"])
+    np.testing.assert_allclose(loaded.P, state["P"])
+    assert loaded.samples_trained == 123
+    assert loaded.best_val_mae_s == 0.15
+
+
+def test_load_returns_none_when_no_manifest(tmp_path: Path):
+    cache = ResidualCache(str(tmp_path / "doesnotexist"), feature_dim=14)
+    assert cache.load() is None
+
+
+def test_load_rejects_wrong_schema(tmp_path: Path):
+    cache = ResidualCache(str(tmp_path), feature_dim=14)
+    cache.write_rls(**_rls_state(), best_val_mae_s=0.1, best_val_mae_d=0.04, train_seq=1)
+    # Corrupt the manifest version
+    manifest_path = tmp_path / "manifest.json"
+    m = json.loads(manifest_path.read_text())
+    m["schema_version"] = SCHEMA_VERSION + 999
+    manifest_path.write_text(json.dumps(m))
+    assert cache.load() is None
+
+
+def test_load_rejects_wrong_feature_dim(tmp_path: Path):
+    cache = ResidualCache(str(tmp_path), feature_dim=14)
+    cache.write_rls(**_rls_state(), best_val_mae_s=0.1, best_val_mae_d=0.04, train_seq=1)
+    cache_wrong = ResidualCache(str(tmp_path), feature_dim=15)
+    assert cache_wrong.load() is None
+
+
+def test_atomic_write_no_partial_state(tmp_path: Path):
+    cache = ResidualCache(str(tmp_path), feature_dim=14)
+    state = _rls_state()
+    cache.write_rls(**state, best_val_mae_s=0.1, best_val_mae_d=0.04, train_seq=1)
+    # No .tmp files left behind
+    leftovers = list(tmp_path.glob("*.tmp"))
+    assert leftovers == []
+
+
+def test_gbm_round_trip(tmp_path: Path):
+    from sklearn.ensemble import HistGradientBoostingRegressor
+    cache = ResidualCache(str(tmp_path), feature_dim=14)
+    rng = np.random.default_rng(0)
+    X = rng.standard_normal((400, 14))
+    y_s = 0.3 * X[:, 1] + 0.1 * rng.standard_normal(400)
+    y_d = 0.1 * X[:, 2] + 0.05 * rng.standard_normal(400)
+    gbm_s = HistGradientBoostingRegressor(max_iter=20, max_depth=3).fit(X, y_s)
+    gbm_d = HistGradientBoostingRegressor(max_iter=20, max_depth=3).fit(X, y_d)
+    cache.write_rls(theta_s=np.zeros(14), theta_d=np.zeros(14), P=np.eye(14),
+                    samples_trained=0, best_val_mae_s=0.05, best_val_mae_d=0.02, train_seq=1)
+    cache.write_gbm(gbm_s, gbm_d)
+    loaded = cache.load()
+    loaded_s, loaded_d = cache.load_gbm()
+    assert loaded is not None and loaded_s is not None and loaded_d is not None
+    np.testing.assert_allclose(loaded_s.predict(X[:5]), gbm_s.predict(X[:5]))
+
+
+def test_load_gbm_returns_none_when_missing(tmp_path: Path):
+    cache = ResidualCache(str(tmp_path), feature_dim=14)
+    s, d = cache.load_gbm()
+    assert s is None and d is None
diff --git a/src/autonomous_kart/test/test_residual_checkpoint.py b/src/autonomous_kart/test/test_residual_checkpoint.py
new file mode 100644
index 00000000..6d80ce87
--- /dev/null
+++ b/src/autonomous_kart/test/test_residual_checkpoint.py
@@ -0,0 +1,68 @@
+"""Tests for Checkpoint dataclass + CheckpointRing eviction policy."""
+import numpy as np
+
+from autonomous_kart.nodes.pathfinder.planners.residual.checkpoint import (
+    Checkpoint, CheckpointRing,
+)
+from autonomous_kart.nodes.pathfinder.planners.residual.regime import RegimeSignature
+
+
+def _ckpt(train_seq, val_mae_s, val_mae_d=0.05):
+    return Checkpoint(
+        train_seq=train_seq, t_wall_ns=train_seq * 1_000_000_000,
+        theta_s=np.zeros(14), theta_d=np.zeros(14), P=np.eye(14),
+        samples_trained=train_seq * 100,
+        gbm_s=None, gbm_d=None,
+        val_mae_s=val_mae_s, val_mae_d=val_mae_d,
+        rls_val_mae_s=val_mae_s * 1.1, rls_val_mae_d=val_mae_d * 1.1,
+        regime=RegimeSignature(4.0, 0.5, 0.02),
+    )
+
+
+def test_ring_empty_returns_none():
+    r = CheckpointRing(size=3)
+    assert r.best_recent() is None
+    assert r.is_empty()
+
+
+def test_ring_records_in_order_until_full():
+    r = CheckpointRing(size=3)
+    r.record(_ckpt(1, 0.20))
+    r.record(_ckpt(2, 0.15))
+    r.record(_ckpt(3, 0.10))
+    assert not r.is_empty()
+    assert r.best_recent().train_seq == 3   # lowest val_mae_s wins
+
+
+def test_ring_evicts_oldest_when_full():
+    r = CheckpointRing(size=3)
+    r.record(_ckpt(1, 0.20))
+    r.record(_ckpt(2, 0.15))
+    r.record(_ckpt(3, 0.10))
+    # 4th insert: ckpt #1 (oldest) is NOT the best, so it gets evicted
+    r.record(_ckpt(4, 0.18))
+    seqs = [c.train_seq for c in r.entries]
+    assert 1 not in seqs
+    assert sorted(seqs) == [2, 3, 4]
+
+
+def test_ring_protects_alltime_best_from_eviction():
+    r = CheckpointRing(size=3)
+    r.record(_ckpt(1, 0.05))   # all-time best
+    r.record(_ckpt(2, 0.20))
+    r.record(_ckpt(3, 0.30))
+    # Inserting #4 would evict oldest=#1, but #1 is best — evict next-oldest instead
+    r.record(_ckpt(4, 0.25))
+    seqs = sorted(c.train_seq for c in r.entries)
+    assert 1 in seqs
+    assert 2 not in seqs       # second-oldest evicted instead
+    assert sorted(seqs) == [1, 3, 4]
+
+
+def test_best_recent_ignores_nan_val_mae():
+    r = CheckpointRing(size=3)
+    bad = _ckpt(1, float("nan"))
+    good = _ckpt(2, 0.10)
+    r.record(bad)
+    r.record(good)
+    assert r.best_recent().train_seq == 2
diff --git a/src/autonomous_kart/test/test_residual_learner.py b/src/autonomous_kart/test/test_residual_learner.py
new file mode 100644
index 00000000..553d13fd
--- /dev/null
+++ b/src/autonomous_kart/test/test_residual_learner.py
@@ -0,0 +1,425 @@
+"""Tests for ResidualLearner: outlier rejection, |d| filter, warmup gate, counters."""
+import numpy as np
+
+from autonomous_kart.nodes.pathfinder.planners.mpc_residual import (
+    ResidualLearner, NUM_FEATURES,
+)
+
+
+def _phi(d=0.0):
+    """14-element feature vector with d at the second slot, like _features() builds."""
+    v = np.zeros(NUM_FEATURES)
+    v[0] = 1.0
+    v[1] = d
+    return v
+
+
+def _make(params=None):
+    p = {
+        "mode": "shadow",
+        "target_horizon_s": 0.05,
+        "forgetting_factor": 0.99,
+        "initial_cov": 1000.0,
+        "min_train_speed_mps": 0.5,
+        "error_window": 200,
+        "outlier_threshold_m": 2.0,
+        "max_train_d_m": 3.0,
+        "max_p_trace": 1.0e5,
+        "max_theta_norm": 50.0,
+        "rls_warmup_samples": 5,
+        "gbm_enabled": False,
+        "apply_min_samples_this_run": 1200,
+        "cache_enabled": False,
+    }
+    if params: p.update(params)
+    return ResidualLearner(p, solve_dt=1.0 / 60.0, s_total=850.0)
+
+
+def test_push_rejects_off_line_kart():
+    learner = _make()
+    # |d| = 4.0 > max_train_d_m = 3.0
+    learner.push(_phi(d=4.0), s_t=0.0, d_t=4.0, nom_ds=0.1, nom_dd=0.0, speed=3.0)
+    learner.step(s_now=0.5, d_now=4.05)
+    assert learner.samples_trained == 0
+    assert learner.off_line_skipped == 1
+
+
+def test_step_rejects_huge_residual_target():
+    learner = _make()
+    # Push a sample, then step at a position whose actual_ds explodes
+    # (s wrap not triggered because s_total > delta).
+    learner.push(_phi(d=0.0), s_t=0.0, d_t=0.0, nom_ds=0.0, nom_dd=0.0, speed=3.0)
+    # Step horizon = 0.05s * 60Hz = 3 ticks, so we need >= 3 pushes for one to age out.
+    learner.push(_phi(d=0.0), s_t=0.0, d_t=0.0, nom_ds=0.0, nom_dd=0.0, speed=3.0)
+    learner.push(_phi(d=0.0), s_t=0.0, d_t=0.0, nom_ds=0.0, nom_dd=0.0, speed=3.0)
+    learner.push(_phi(d=0.0), s_t=0.0, d_t=0.0, nom_ds=0.0, nom_dd=0.0, speed=3.0)
+    # Now step at s_now=10 (actual_ds=10 vs nom_ds=0 → residual 10m > threshold 2m)
+    learner.step(s_now=10.0, d_now=0.0)
+    assert learner.samples_trained == 0
+    assert learner.outliers_dropped == 1
+
+
+def test_predict_returns_zero_during_warmup():
+    learner = _make({"rls_warmup_samples": 50})
+    phi = _phi(d=0.0)
+    pred_s, pred_d, source = learner.predict(phi)
+    assert pred_s == 0.0
+    assert pred_d == 0.0
+    assert source == 0   # RLS
+
+
+def test_predict_returns_nonzero_after_warmup():
+    learner = _make({"rls_warmup_samples": 3})
+    # Drive samples through; horizon is 3 ticks so we need 4 pushes + steps
+    for i in range(20):
+        learner.push(_phi(d=0.0), s_t=float(i) * 0.1, d_t=0.0,
+                     nom_ds=0.0, nom_dd=0.0, speed=3.0)
+        learner.step(s_now=float(i + 1) * 0.1, d_now=0.0)
+    assert learner.samples_trained >= 3
+    pred_s, pred_d, source = learner.predict(_phi(d=0.0))
+    # theta_s should be non-zero after enough updates with non-zero residuals
+    assert source == 0
+    # samples_accepted_this_run is bumped per step()
+    assert learner.samples_accepted_this_run == learner.samples_trained
+
+
+def test_effective_mode_starts_shadow_then_apply():
+    learner = _make({"apply_min_samples_this_run": 4, "rls_warmup_samples": 0})
+    assert learner.effective_mode() == "shadow"
+    for i in range(20):
+        learner.push(_phi(d=0.0), s_t=float(i) * 0.1, d_t=0.0,
+                     nom_ds=0.0, nom_dd=0.0, speed=3.0)
+        learner.step(s_now=float(i + 1) * 0.1, d_now=0.0)
+    # samples_accepted_this_run >= 4 → effective_mode rises to requested mode
+    learner.mode = "apply"
+    assert learner.effective_mode() == "apply"
+
+
+def test_apply_mode_gated_back_to_shadow_when_yaml_shadow():
+    learner = _make({"apply_min_samples_this_run": 0})
+    learner.mode = "shadow"
+    assert learner.effective_mode() == "shadow"
+    learner.mode = "off"
+    assert learner.effective_mode() == "off"
+
+
+def test_predict_unchanged_by_effective_mode():
+    """predict() always returns the raw model output; gating happens at the
+    consumer (mpc.py)."""
+    learner = _make({"apply_min_samples_this_run": 10, "rls_warmup_samples": 0})
+    learner.mode = "apply"
+    learner.theta_s[0] = 0.5
+    pred_s, _, _ = learner.predict(_phi(d=0.0))
+    # samples_accepted_this_run = 0 → effective_mode == "shadow", but predict still returns 0.5
+    assert learner.effective_mode() == "shadow"
+    assert pred_s == 0.5
+
+
+def test_warm_start_from_cache(tmp_path):
+    params = {
+        "mode": "shadow",
+        "target_horizon_s": 0.05,
+        "forgetting_factor": 0.99, "initial_cov": 1000.0,
+        "min_train_speed_mps": 0.5, "error_window": 200,
+        "outlier_threshold_m": 2.0, "max_train_d_m": 3.0,
+        "max_p_trace": 1.0e5, "max_theta_norm": 50.0,
+        "rls_warmup_samples": 0, "apply_min_samples_this_run": 0,
+        "gbm_enabled": False,
+        "cache_enabled": True, "cache_dir": str(tmp_path), "cache_load_on_start": True,
+    }
+    a = ResidualLearner(params, solve_dt=1.0 / 60.0, s_total=850.0)
+    # Train a tiny bit
+    for i in range(20):
+        a.push(_phi(d=0.0), s_t=float(i) * 0.1, d_t=0.0,
+               nom_ds=0.0, nom_dd=0.0, speed=3.0)
+        a.step(s_now=float(i + 1) * 0.1, d_now=0.0)
+    a.save_rls_cache(best_val_mae_s=0.1, best_val_mae_d=0.04, train_seq=1)
+
+    b = ResidualLearner(params, solve_dt=1.0 / 60.0, s_total=850.0)
+    assert b.cache_loaded
+    np.testing.assert_allclose(b.theta_s, a.theta_s)
+    np.testing.assert_allclose(b.P, a.P)
+    assert b.samples_trained == a.samples_trained
+
+
+def test_buffer_grows_when_gbm_enabled():
+    params = {
+        "mode": "shadow", "target_horizon_s": 0.05,
+        "forgetting_factor": 0.99, "initial_cov": 1000.0,
+        "min_train_speed_mps": 0.5, "error_window": 200,
+        "outlier_threshold_m": 2.0, "max_train_d_m": 3.0,
+        "max_p_trace": 1.0e5, "max_theta_norm": 50.0,
+        "rls_warmup_samples": 0, "apply_min_samples_this_run": 0,
+        "gbm_enabled": True,
+        "gbm_buffer_capacity": 100,
+        "cache_enabled": False,
+    }
+    learner = ResidualLearner(params, solve_dt=1.0 / 60.0, s_total=850.0)
+    try:
+        assert learner.buffer is not None
+        for i in range(20):
+            learner.push(_phi(d=0.0), s_t=float(i) * 0.1, d_t=0.0,
+                         nom_ds=0.0, nom_dd=0.0, speed=3.0)
+            learner.step(s_now=float(i + 1) * 0.1, d_now=0.0)
+        # Each accepted step() pushes one sample into the buffer.
+        assert learner.buffer.size == learner.samples_trained
+    finally:
+        learner.shutdown()
+
+
+class _FakeGBM:
+    def __init__(self, value):
+        self.value = value
+
+    def predict(self, X):
+        return np.full(len(X), self.value)
+
+
+def test_selector_picks_gbm_when_val_mae_better():
+    params = {
+        "mode": "shadow", "target_horizon_s": 0.05,
+        "forgetting_factor": 0.99, "initial_cov": 1000.0,
+        "min_train_speed_mps": 0.5, "error_window": 200,
+        "outlier_threshold_m": 2.0, "max_train_d_m": 3.0,
+        "max_p_trace": 1.0e5, "max_theta_norm": 50.0,
+        "rls_warmup_samples": 0, "apply_min_samples_this_run": 0,
+        "gbm_enabled": False,  # keep trainer thread off; we drive the selector by hand
+        "cache_enabled": False,
+        "gbm_select_eps_s_m": 0.01, "gbm_select_eps_d_m": 0.005,
+        "gbm_predict_clip_m": 0.5,
+    }
+    learner = ResidualLearner(params, solve_dt=1.0 / 60.0, s_total=850.0)
+    learner._install_gbm(_FakeGBM(0.3), _FakeGBM(0.1),
+                        val_mae_s=0.05, val_mae_d=0.02,
+                        rls_val_mae_s=0.20, rls_val_mae_d=0.10)
+    pred_s, pred_d, source = learner.predict(_phi(d=0.0))
+    assert source == 1   # SOURCE_GBM
+    assert pred_s == 0.3
+    assert pred_d == 0.1
+
+
+def test_selector_stays_rls_when_gbm_only_marginally_better():
+    params = {
+        "mode": "shadow", "target_horizon_s": 0.05,
+        "forgetting_factor": 0.99, "initial_cov": 1000.0,
+        "min_train_speed_mps": 0.5, "error_window": 200,
+        "outlier_threshold_m": 2.0, "max_train_d_m": 3.0,
+        "max_p_trace": 1.0e5, "max_theta_norm": 50.0,
+        "rls_warmup_samples": 0, "apply_min_samples_this_run": 0,
+        "gbm_enabled": False, "cache_enabled": False,
+        "gbm_select_eps_s_m": 0.01, "gbm_select_eps_d_m": 0.005,
+        "gbm_predict_clip_m": 0.5,
+    }
+    learner = ResidualLearner(params, solve_dt=1.0 / 60.0, s_total=850.0)
+    learner._install_gbm(_FakeGBM(0.0), _FakeGBM(0.0),
+                        val_mae_s=0.205, val_mae_d=0.10,
+                        rls_val_mae_s=0.20, rls_val_mae_d=0.10)
+    pred_s, pred_d, source = learner.predict(_phi(d=0.0))
+    assert source == 0
+
+
+def test_gbm_predict_clipped():
+    params = {
+        "mode": "apply", "target_horizon_s": 0.05,
+        "forgetting_factor": 0.99, "initial_cov": 1000.0,
+        "min_train_speed_mps": 0.5, "error_window": 200,
+        "outlier_threshold_m": 2.0, "max_train_d_m": 3.0,
+        "max_p_trace": 1.0e5, "max_theta_norm": 50.0,
+        "rls_warmup_samples": 0, "apply_min_samples_this_run": 0,
+        "gbm_enabled": False, "cache_enabled": False,
+        "gbm_select_eps_s_m": 0.01, "gbm_select_eps_d_m": 0.005,
+        "gbm_predict_clip_m": 0.5,
+    }
+    learner = ResidualLearner(params, solve_dt=1.0 / 60.0, s_total=850.0)
+    learner._install_gbm(_FakeGBM(2.0), _FakeGBM(-1.0),
+                        val_mae_s=0.05, val_mae_d=0.05,
+                        rls_val_mae_s=0.30, rls_val_mae_d=0.30)
+    pred_s, pred_d, source = learner.predict(_phi(d=0.0))
+    assert source == 1
+    assert pred_s == 0.5     # clipped from 2.0 to +0.5
+    assert pred_d == -0.5    # clipped from -1.0 to -0.5
+    assert learner.last_pred_clipped == 1
+
+
+def test_rls_divergence_restores_matching_regime():
+    from autonomous_kart.nodes.pathfinder.planners.residual.regime import RegimeSignature
+    learner = _make({"regime_dv_tol_mps": 1.0, "regime_dd_tol_m": 0.3,
+                     "regime_dkappa_rel_tol": 0.5, "max_theta_norm": 0.5})
+    # Plant a good RLS state in a checkpoint with regime ≈ current
+    good_theta_s = np.ones(NUM_FEATURES) * 0.1
+    learner._seed_checkpoint(
+        theta_s=good_theta_s, theta_d=np.zeros(NUM_FEATURES),
+        P=np.eye(NUM_FEATURES), samples_trained=500,
+        val_mae_s=0.05, val_mae_d=0.02,
+        regime=RegimeSignature(4.0, 0.5, 0.02),
+    )
+    learner._current_regime = RegimeSignature(4.1, 0.5, 0.02)
+    # Force divergence
+    learner.theta_s = np.ones(NUM_FEATURES) * 100.0
+    learner.theta_d = np.ones(NUM_FEATURES) * 100.0
+    learner._trigger_divergence_check()
+    np.testing.assert_allclose(learner.theta_s, good_theta_s)
+    assert learner.divergence_resets == 1
+
+
+def test_rls_divergence_zeros_when_regime_dissimilar():
+    from autonomous_kart.nodes.pathfinder.planners.residual.regime import RegimeSignature
+    learner = _make({"max_theta_norm": 0.5})
+    learner._seed_checkpoint(
+        theta_s=np.ones(NUM_FEATURES) * 0.1, theta_d=np.zeros(NUM_FEATURES),
+        P=np.eye(NUM_FEATURES), samples_trained=500,
+        val_mae_s=0.05, val_mae_d=0.02,
+        regime=RegimeSignature(2.0, 0.1, 0.005),
+    )
+    learner._current_regime = RegimeSignature(7.0, 1.0, 0.05)
+    learner.theta_s = np.ones(NUM_FEATURES) * 100.0
+    learner.theta_d = np.ones(NUM_FEATURES) * 100.0
+    learner._trigger_divergence_check()
+    np.testing.assert_allclose(learner.theta_s, np.zeros(NUM_FEATURES))
+
+
+def test_clip_rate_revert_disables_gbm():
+    learner = _make({"rls_warmup_samples": 0,
+                     "revert_clip_rate_window": 10,
+                     "revert_clip_rate_threshold": 0.30,
+                     "gbm_predict_clip_m": 0.1})
+    learner._install_gbm(_FakeGBM(1.0), _FakeGBM(1.0),
+                         val_mae_s=0.05, val_mae_d=0.05,
+                         rls_val_mae_s=0.30, rls_val_mae_d=0.30)
+    assert learner.use_gbm
+    # Force many clipped predictions
+    for _ in range(10):
+        learner.predict(_phi(d=0.0))
+    learner._maybe_clip_revert()
+    assert not learner.use_gbm
+    assert learner.revert_count == 1
+
+
+def test_manual_revert_to_checkpoint():
+    from autonomous_kart.nodes.pathfinder.planners.residual.regime import RegimeSignature
+    learner = _make()
+    learner._seed_checkpoint(
+        theta_s=np.full(NUM_FEATURES, 0.7), theta_d=np.zeros(NUM_FEATURES),
+        P=np.eye(NUM_FEATURES), samples_trained=999,
+        val_mae_s=0.04, val_mae_d=0.02,
+        regime=RegimeSignature(99.0, 99.0, 99.0),  # deliberately weird regime
+    )
+    learner.theta_s = np.zeros(NUM_FEATURES)
+    learner.manual_revert()
+    np.testing.assert_allclose(learner.theta_s, np.full(NUM_FEATURES, 0.7))
+    assert learner.revert_count == 1
+
+
+def test_current_regime_tracks_buffer():
+    learner = _make({"gbm_enabled": True, "gbm_buffer_capacity": 200,
+                     "rls_warmup_samples": 0, "apply_min_samples_this_run": 0,
+                     "cache_enabled": False})
+    try:
+        for i in range(150):
+            phi = np.zeros(NUM_FEATURES)
+            phi[0] = 1.0
+            phi[1] = 0.5    # d
+            phi[2] = 4.0    # v_s placeholder (we re-derive from speed in step)
+            phi[4] = 0.02   # kappa
+            learner.push(phi, s_t=float(i) * 0.1, d_t=0.5,
+                         nom_ds=0.0, nom_dd=0.0, speed=4.0)
+            learner.step(s_now=float(i + 1) * 0.1, d_now=0.5)
+        sig = learner._compute_current_regime()
+        assert abs(sig.mean_v - 4.0) < 0.5
+        assert abs(sig.mean_abs_d - 0.5) < 0.2
+    finally:
+        learner.shutdown()
+
+
+def test_trainer_installs_gbm_and_flips_selector():
+    import time as _time
+    params = {
+        "mode": "shadow", "target_horizon_s": 0.05,
+        "forgetting_factor": 0.99, "initial_cov": 1000.0,
+        "min_train_speed_mps": 0.5, "error_window": 200,
+        "outlier_threshold_m": 2.0, "max_train_d_m": 3.0,
+        "max_p_trace": 1.0e5, "max_theta_norm": 50.0,
+        "rls_warmup_samples": 0, "apply_min_samples_this_run": 0,
+        "gbm_enabled": True, "gbm_buffer_capacity": 2000,
+        "gbm_max_iter": 30, "gbm_max_depth": 3, "gbm_learning_rate": 0.1,
+        "gbm_min_samples_leaf": 10, "gbm_min_samples_to_train": 1500,
+        "gbm_retrain_secs": 0.2, "gbm_retrain_every_samples": 999999,
+        "cache_enabled": False,
+        "gbm_select_eps_s_m": 0.01, "gbm_select_eps_d_m": 0.005,
+        "gbm_predict_clip_m": 0.5,
+    }
+    learner = ResidualLearner(params, solve_dt=1.0 / 60.0, s_total=850.0)
+    try:
+        rng = np.random.default_rng(0)
+        for i in range(1800):
+            phi = rng.standard_normal(NUM_FEATURES)
+            phi[0] = 1.0
+            learner.buffer.append(phi, r_s=0.3 * phi[1], r_d=0.1 * phi[2], t_ns=i)
+        _time.sleep(2.0)
+        # After training, the learner should have a result with a non-None GBM
+        assert learner.trainer.last_result is not None
+        assert learner.trainer.last_result.gbm_s is not None
+        # And the learner's _gbm_s slot should be populated via the install-callback
+        assert learner._gbm_s is not None
+    finally:
+        learner.shutdown()
+
+
+def test_rls_predict_clipped():
+    """RLS predictions are clipped to ±gbm_predict_clip_m, matching GBM.
+
+    Offline replay caught RLS spiking to ~200 m of Δs prediction in benign
+    driving. The clip is hard safety for apply mode and matches GBM."""
+    learner = _make({"rls_warmup_samples": 0,
+                     "gbm_predict_clip_m": 0.5})
+    # Push theta beyond the clip threshold via the public attributes
+    learner.theta_s = np.zeros(NUM_FEATURES)
+    learner.theta_d = np.zeros(NUM_FEATURES)
+    learner.theta_s[0] = 100.0   # bias slot — phi[0]=1.0 → prediction = 100
+    learner.theta_d[0] = -100.0
+    learner.samples_trained = 100  # past warmup
+    pred_s, pred_d, source = learner.predict(_phi(d=0.0))
+    assert source == 0   # RLS
+    assert pred_s == 0.5     # clipped from 100 → +0.5
+    assert pred_d == -0.5    # clipped from -100 → -0.5
+    assert learner.last_pred_clipped == 1
+
+
+def test_rls_predict_unclipped_when_in_bounds():
+    """RLS predictions within ±clip pass through unchanged."""
+    learner = _make({"rls_warmup_samples": 0, "gbm_predict_clip_m": 0.5})
+    learner.theta_s = np.zeros(NUM_FEATURES)
+    learner.theta_d = np.zeros(NUM_FEATURES)
+    learner.theta_s[0] = 0.2
+    learner.samples_trained = 100
+    pred_s, pred_d, _ = learner.predict(_phi(d=0.0))
+    assert abs(pred_s - 0.2) < 1e-12
+    assert learner.last_pred_clipped == 0
+
+
+def test_selector_flips_to_gbm_when_only_d_wins():
+    """Selector's OR rule: GBM only needs to beat RLS on s OR d, not both.
+
+    Offline replay showed the AND rule kept the selector on RLS 94% of ticks
+    because s val_maes were nearly tied even when GBM was a clear winner on d."""
+    learner = _make({"rls_warmup_samples": 0,
+                     "gbm_select_eps_s_m": 0.0,
+                     "gbm_select_eps_d_m": 0.0,
+                     "gbm_predict_clip_m": 5.0})
+    # GBM ties on s (no improvement), but beats on d by a clear margin.
+    learner._install_gbm(_FakeGBM(0.1), _FakeGBM(0.05),
+                         val_mae_s=0.30, val_mae_d=0.05,
+                         rls_val_mae_s=0.30, rls_val_mae_d=0.12)
+    assert learner.use_gbm    # OR rule flips because d wins
+
+
+def test_selector_stays_rls_when_neither_axis_beats():
+    """If GBM is no better on either axis, selector stays on RLS."""
+    learner = _make({"rls_warmup_samples": 0,
+                     "gbm_select_eps_s_m": 0.0,
+                     "gbm_select_eps_d_m": 0.0,
+                     "gbm_predict_clip_m": 5.0})
+    learner._install_gbm(_FakeGBM(0.0), _FakeGBM(0.0),
+                         val_mae_s=0.30, val_mae_d=0.10,
+                         rls_val_mae_s=0.30, rls_val_mae_d=0.10)
+    assert not learner.use_gbm
diff --git a/src/autonomous_kart/test/test_residual_regime.py b/src/autonomous_kart/test/test_residual_regime.py
new file mode 100644
index 00000000..99336c1b
--- /dev/null
+++ b/src/autonomous_kart/test/test_residual_regime.py
@@ -0,0 +1,48 @@
+"""Tests for RegimeSignature similarity."""
+import numpy as np
+
+from autonomous_kart.nodes.pathfinder.planners.residual.regime import (
+    RegimeSignature, similar,
+)
+
+
+def test_signature_from_arrays():
+    v = np.array([3.0, 4.0, 5.0])
+    d = np.array([0.1, -0.2, 0.0])
+    kappa = np.array([0.01, 0.0, -0.01])
+    sig = RegimeSignature.from_arrays(v, d, kappa)
+    assert abs(sig.mean_v - 4.0) < 1e-9
+    assert abs(sig.mean_abs_d - (0.1 + 0.2 + 0.0) / 3) < 1e-9
+    assert abs(sig.std_kappa - np.std(kappa)) < 1e-9
+
+
+def test_similar_within_tolerances():
+    a = RegimeSignature(mean_v=4.0, mean_abs_d=0.5, std_kappa=0.02)
+    b = RegimeSignature(mean_v=4.5, mean_abs_d=0.6, std_kappa=0.02)
+    assert similar(a, b, dv_tol=1.0, dd_tol=0.3, dkappa_rel_tol=0.5)
+
+
+def test_dissimilar_when_speed_too_far():
+    a = RegimeSignature(mean_v=4.0, mean_abs_d=0.5, std_kappa=0.02)
+    b = RegimeSignature(mean_v=6.0, mean_abs_d=0.5, std_kappa=0.02)
+    assert not similar(a, b, dv_tol=1.0, dd_tol=0.3, dkappa_rel_tol=0.5)
+
+
+def test_dissimilar_when_d_too_far():
+    a = RegimeSignature(mean_v=4.0, mean_abs_d=0.2, std_kappa=0.02)
+    b = RegimeSignature(mean_v=4.0, mean_abs_d=0.8, std_kappa=0.02)
+    assert not similar(a, b, dv_tol=1.0, dd_tol=0.3, dkappa_rel_tol=0.5)
+
+
+def test_dissimilar_when_kappa_too_far():
+    a = RegimeSignature(mean_v=4.0, mean_abs_d=0.2, std_kappa=0.01)
+    b = RegimeSignature(mean_v=4.0, mean_abs_d=0.2, std_kappa=0.05)
+    # |0.05 - 0.01| / max(0.05, 0.01, 1e-3) = 0.8 > 0.5
+    assert not similar(a, b, dv_tol=1.0, dd_tol=0.3, dkappa_rel_tol=0.5)
+
+
+def test_from_empty_arrays_safe():
+    sig = RegimeSignature.from_arrays(np.array([]), np.array([]), np.array([]))
+    assert sig.mean_v == 0.0
+    assert sig.mean_abs_d == 0.0
+    assert sig.std_kappa == 0.0
diff --git a/trajectory (1).npz b/trajectory (1).npz
new file mode 100644
index 00000000..fb2b3e3d
Binary files /dev/null and b/trajectory (1).npz differ
diff --git a/viz/viz.html b/viz/viz.html
index 15ff1a8e..e1367356 100644
--- a/viz/viz.html
+++ b/viz/viz.html
@@ -2,7 +2,7 @@
 <html lang="en">
 <head>
 <meta charset="UTF-8">
-<title>Kart Sim</title>
+<title>Kart Sim - Track Intersection Vision</title>
 <style>
   @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&display=swap');
   * { margin:0; padding:0; box-sizing:border-box; }
@@ -12,10 +12,11 @@
     position:fixed; top:16px; left:16px;
     background:rgba(10,10,15,0.85); border:1px solid #222;
     padding:12px 16px; border-radius:6px; font-size:12px; line-height:1.8;
-    min-width:220px; backdrop-filter:blur(8px);
+    min-width:240px; backdrop-filter:blur(8px);
   }
   #hud .label { color:#666; }
   #hud .val { color:#e0e0e0; font-weight:700; }
+  #hud .vision-val { color:#a78bfa; font-weight:700; }
   #costs {
     position:fixed; bottom:16px; right:16px;
     background:rgba(10,10,15,0.85); border:1px solid #222;
@@ -31,7 +32,6 @@
   #costs td.val { color:#e0e0e0; text-align:right; font-variant-numeric:tabular-nums; width:64px; white-space:nowrap; }
   #costs .barfill { height:6px; background:#60a5fa; border-radius:2px; }
   #costs .barfill.neg { background:#4ade80; }
-  #costs .empty { color:#444; font-style:italic; }
   #hud .state { display:inline-block; padding:2px 8px; border-radius:3px; font-size:11px; font-weight:700; }
   #hud .state.IDLE { background:#333; color:#888; }
   #hud .state.MANUAL { background:#1a3a1a; color:#4ade80; }
@@ -53,6 +53,48 @@
     font-family:inherit; font-size:11px; cursor:pointer;
   }
   #controls button:hover { border-color:#555; color:#fff; }
+  #residual {
+    position:fixed; bottom:16px; left:16px;
+    background:rgba(10,10,15,0.85); border:1px solid #222;
+    padding:12px 14px; border-radius:6px; font-size:11px; line-height:1.6;
+    min-width:240px; backdrop-filter:blur(8px);
+  }
+  #residual .title { color:#888; font-weight:700; margin-bottom:6px; letter-spacing:1px; }
+  #residual .row { display:flex; justify-content:space-between; align-items:center; gap:8px; }
+  #residual .row .label { color:#666; }
+  #residual .row .val { color:#e0e0e0; font-variant-numeric:tabular-nums; }
+  #residual .mode { display:inline-block; padding:2px 8px; border-radius:3px; font-size:11px; font-weight:700; }
+  #residual .mode.OFF    { background:#333; color:#888; }
+  #residual .mode.SHADOW { background:#2a2210; color:#facc15; }
+  #residual .mode.APPLY  { background:#1a3a1a; color:#4ade80; }
+  #residual .arrow       { color:#444; margin:0 4px; }
+  #residual .err-better  { color:#4ade80; }
+  #residual .err-worse   { color:#f87171; }
+  #residual #res-spark   { display:block; margin-top:6px; background:#070710; border-radius:2px; }
+  #residual .btn-row     { display:flex; gap:4px; margin-top:8px; }
+  #residual .btn-row button {
+    flex:1; background:#1a1a22; color:#aaa; border:1px solid #333;
+    padding:4px 6px; border-radius:3px; font-family:inherit; font-size:10px; cursor:pointer;
+    letter-spacing:1px;
+  }
+  #residual .btn-row button.active { border-color:#4ade80; color:#fff; background:#1a2a1a; }
+  #residual .btn-row button.flash  { border-color:#f87171; background:#3a1a1a; color:#fff; }
+  #residual .btn-row button:hover  { border-color:#555; color:#fff; }
+  #residual .section-lbl { color:#666; font-size:10px; letter-spacing:1.5px; margin-top:10px; }
+  #residual .line-row { display:flex; gap:4px; margin-top:6px; align-items:center; }
+  #residual select, #residual input[type="text"] {
+    background:#1a1a22; color:#c8c8d0; border:1px solid #333;
+    padding:4px 6px; border-radius:3px;
+    font-family:inherit; font-size:10px;
+    min-width:0;
+  }
+  #residual select { flex:0 0 96px; }
+  #residual input[type="text"] { flex:1 1 auto; width:60px; }
+  #residual select:focus, #residual input[type="text"]:focus { outline:none; border-color:#555; }
+  #residual .line-status { font-size:10px; margin-top:6px; min-height:12px; line-height:1.2; }
+  #residual .line-status.ok  { color:#4ade80; }
+  #residual .line-status.err { color:#f87171; }
+  #residual .hint { color:#444; font-size:10px; margin-top:6px; font-style:italic; line-height:1.3; }
 </style>
 </head>
 <body>
@@ -64,6 +106,9 @@
   <div><span class="label">YAW   </span><span id="h-yaw" class="val">—</span></div>
   <div><span class="label">SPEED </span><span id="h-spd" class="val">—</span></div>
   <div><span class="label">CMD   </span><span id="h-cmd" class="val">—</span></div>
+  <hr style="border:0; border-top:1px solid #222; margin:6px 0;">
+  <div><span class="label">INTERSECT L </span><span id="h-cam-l" class="vision-val">—</span></div>
+  <div><span class="label">INTERSECT R </span><span id="h-cam-r" class="vision-val">—</span></div>
 </div>
 
 <div id="status" class="err">disconnected</div>
@@ -84,31 +129,75 @@
   scroll: zoom · drag: pan · R: reset view · WASD/arrows: drive in MANUAL
 </div>
 
+<div id="residual">
+  <div class="title">MPC RESIDUAL LEARNER</div>
+  <div class="row"><span class="label">mode</span><span id="res-mode" class="mode OFF">OFF</span></div>
+  <div class="row"><span class="label">samples</span><span id="res-samples" class="val">—</span></div>
+  <div class="row"><span class="label">||θ_s||</span><span id="res-theta-s" class="val">—</span></div>
+  <div class="row"><span class="label">||θ_d||</span><span id="res-theta-d" class="val">—</span></div>
+  <div class="row"><span class="label">ds</span><span class="val"><span id="res-ds-nom">—</span><span class="arrow">→</span><span id="res-ds-res">—</span></span></div>
+  <div class="row"><span class="label">dd</span><span class="val"><span id="res-dd-nom">—</span><span class="arrow">→</span><span id="res-dd-res">—</span></span></div>
+  <canvas id="res-spark" width="60" height="16"></canvas>
+  <div class="btn-row">
+    <button id="res-btn-off"    onclick="setResidualMode('off')">OFF</button>
+    <button id="res-btn-shadow" onclick="setResidualMode('shadow')">SHADOW</button>
+    <button id="res-btn-apply"  onclick="setResidualMode('apply')">APPLY</button>
+  </div>
+
+  <div class="section-lbl">PLANNER</div>
+  <div class="btn-row">
+    <button id="plan-btn-pure_pursuit" title="pure_pursuit" onclick="setPlanner('pure_pursuit')">PP</button>
+    <button id="plan-btn-mpc"          title="mpc"          onclick="setPlanner('mpc')">MPC</button>
+    <button id="plan-btn-opencv"       title="opencv"       onclick="setPlanner('opencv')">CV</button>
+  </div>
+
+  <div class="section-lbl">RACING LINE</div>
+  <div class="line-row">
+    <select id="line-select">
+      <option value="/ws/data/racing_line/line2.csv">line2.csv</option>
+      <option value="/ws/data/racing_line/line4.csv">line4.csv</option>
+      <option value="/ws/data/racing_line/line6.csv">line6.csv</option>
+      <option value="/ws/data/racing_line/line8.csv">line8.csv</option>
+      <option value="/ws/data/racing_line/line10.csv">line10.csv</option>
+      <option value="/ws/data/racing_line/line12.csv">line12.csv</option>
+      <option value="/ws/data/racing_line/line14.csv">line14.csv</option>
+      <option value="/ws/data/racing_line/line15.csv">line15.csv</option>
+      <option value="/ws/data/racing_line/line16.csv">line16.csv</option>
+    </select>
+    <input id="line-input" type="text" placeholder="/ws/data/racing_line/...">
+    <button id="line-load" onclick="loadLine()">Load</button>
+  </div>
+  <div id="line-status" class="line-status"></div>
+  <div class="hint">Line swap rebuilds planners; residual θ is preserved.</div>
+</div>
+
 <script>
 const API = `http://${location.hostname || 'localhost'}:8000`;
 
 const C = document.getElementById('c');
 const ctx = C.getContext('2d');
 
-// ── State ──
 let racingLine = [];
+let leftTrackEdge = [];
+let rightTrackEdge = [];
+const TRACK_WIDTH = 3.0; // Total track width in meters
+
 let trail = [];
 const MAX_TRAIL = 3000;
-let odom = { x:0, y:0, yaw:0, speed:0, motor:0, steer:0, state:'IDLE' };
+let odom = { x:0, y:0, yaw:0, speed:0, motor:0, steer:0, state:'IDLE', track_angles:[0.0, 0.0] };
+let currentComputedAngles = [22.5, 22.5]; // Default fallback values
+
 let connected = false;
 let mpcStatus = null;
-let dynamicLine = null;  // {active, strategy, merge_idx, points: [[x,y], ...]}
+let dynamicLine = null;
 
-// ── Camera ──
 let cam = { x:0, y:0, zoom:4, targetZoom:4 };
 let dragging = false, dragStart = {x:0,y:0}, camStart = {x:0,y:0};
 
-// ── Resize ──
 function resize() { C.width = innerWidth; C.height = innerHeight; }
 addEventListener('resize', resize);
 resize();
 
-// ── Input ──
 C.addEventListener('wheel', e => {
   e.preventDefault();
   cam.targetZoom *= e.deltaY > 0 ? 0.9 : 1.1;
@@ -127,9 +216,8 @@
 });
 addEventListener('mouseup', () => dragging = false);
 
-// ── Manual driving (WASD / arrow keys, active in MANUAL state) ──
-const MANUAL_SPEED_PCT = 30;   // forward throttle %
-const MANUAL_STEER_PCT = 60;   // L/R steer %
+const MANUAL_SPEED_PCT = 30;
+const MANUAL_STEER_PCT = 60;
 const heldDriveKeys = new Set();
 let manualSendInterval = null;
 
@@ -147,12 +235,23 @@
   } catch(e) {}
 }
 
+// ─── NEW FUNCTION: CHANNELS DISCOVERED INTERSECTIONS TO MASTER API ───
+async function publishCameraVision(leftAngle, rightAngle) {
+  if (odom.state !== 'AUTONOMOUS') return;
+  try {
+    await fetch(API + '/camera_angles', {
+      method: 'POST',
+      headers: {'Content-Type': 'application/json'},
+      body: JSON.stringify({ left_angle: leftAngle, right_angle: rightAngle }),
+    });
+  } catch(e) {}
+}
+
 function computeManualCmd() {
   let speed = 0, steer = 0;
   if (heldDriveKeys.has('w') || heldDriveKeys.has('arrowup'))    speed += MANUAL_SPEED_PCT;
   if (heldDriveKeys.has('a') || heldDriveKeys.has('arrowleft'))  steer -= MANUAL_STEER_PCT;
   if (heldDriveKeys.has('d') || heldDriveKeys.has('arrowright')) steer += MANUAL_STEER_PCT;
-  // 's' / down stops (no reverse on the kart side).
   return { speed, steer };
 }
 
@@ -162,13 +261,12 @@
     if (odom.state !== 'MANUAL') return;
     const { speed, steer } = computeManualCmd();
     sendManual(speed, steer);
-  }, 50);  // 20 Hz — well under the 0.5 s server timeout
+  }, 50);
 }
 function stopManualSending() {
   if (!manualSendInterval) return;
   clearInterval(manualSendInterval);
   manualSendInterval = null;
-  // Final stop frame so the kart doesn't coast on the last cached value.
   sendManual(0, 0);
 }
 
@@ -179,7 +277,7 @@
   }
   const k = e.key.toLowerCase();
   if (isDriveKey(k)) {
-    if (e.repeat) return;     // ignore OS auto-repeat
+    if (e.repeat) return;
     heldDriveKeys.add(k);
     startManualSending();
     e.preventDefault();
@@ -198,15 +296,44 @@
   stopManualSending();
 });
 
-// ── World → Screen ──
 function w2s(wx, wy) {
   return [
     C.width/2  + (wx - cam.x) * cam.zoom,
-    C.height/2 - (wy - cam.y) * cam.zoom   // flip Y
+    C.height/2 - (wy - cam.y) * cam.zoom
   ];
 }
 
-// ── Fetch ──
+// Generates explicit parallel tracking boundaries out of centerline points
+function generateTrackEdges() {
+  if (racingLine.length < 2) return;
+  leftTrackEdge = [];
+  rightTrackEdge = [];
+  const halfWidth = TRACK_WIDTH / 2.0;
+
+  for (let i = 0; i < racingLine.length; i++) {
+    let next = racingLine[(i + 1) % racingLine.length];
+    let prev = racingLine[(i - 1 + racingLine.length) % racingLine.length];
+    let curr = racingLine[i];
+
+    // Compute tangent vector
+    let dx = next[0] - prev[0];
+    let dy = next[1] - prev[1];
+    let len = Math.sqrt(dx*dx + dy*dy);
+    if (len < 1e-6) {
+      leftTrackEdge.push([curr[0], curr[1]]);
+      rightTrackEdge.push([curr[0], curr[1]]);
+      continue;
+    }
+    
+    // Normal unit vectors
+    let nx = -dy / len;
+    let ny = dx / len;
+
+    leftTrackEdge.push([curr[0] + nx * halfWidth, curr[1] + ny * halfWidth]);
+    rightTrackEdge.push([curr[0] - nx * halfWidth, curr[1] - ny * halfWidth]);
+  }
+}
+
 async function fetchOdom() {
   try {
     const [odomRes, stateRes] = await Promise.all([
@@ -217,9 +344,13 @@
     const s = await stateRes.json();
     odom = d;
     odom.state = s.state || d.state || 'IDLE';
+    
     trail.push({x:d.x, y:d.y});
     if (trail.length > MAX_TRAIL) trail.shift();
     connected = true;
+    
+    // Fire calculated vision parameters back into the ROS/Python system loop
+    publishCameraVision(currentComputedAngles[0], currentComputedAngles[1]);
   } catch(e) { connected = false; }
 }
 
@@ -227,7 +358,10 @@
   try {
     const r = await fetch(API + '/racing_line');
     const d = await r.json();
-    if (d.points) racingLine = d.points;
+    if (d.points && d.points.length > 1) {
+        racingLine = d.points;
+        generateTrackEdges();
+    }
   } catch(e) {}
 }
 
@@ -237,6 +371,173 @@
     mpcStatus = await r.json();
   } catch(e) { mpcStatus = null; }
   renderCosts();
+  renderResidual();
+}
+
+const RES_SPARK_MAX = 120;
+const resEsHistory = [];
+
+async function setResidualMode(mode) {
+  const btn = document.getElementById('res-btn-' + mode);
+  try {
+    const r = await fetch(API + '/mpc/residual_mode', {
+      method: 'POST',
+      headers: {'Content-Type': 'application/json'},
+      body: JSON.stringify({mode}),
+    });
+    if (!r.ok) throw new Error('HTTP ' + r.status);
+  } catch(e) {
+    console.error('residual_mode failed', e);
+    if (btn) {
+      btn.classList.add('flash');
+      setTimeout(() => btn.classList.remove('flash'), 600);
+    }
+  }
+}
+
+function renderResidual() {
+  const m = mpcStatus;
+  const modeNum = (m && Number.isFinite(m.residual_mode_num)) ? (m.residual_mode_num | 0) : 0;
+  const modeLabel = modeNum === 2 ? 'APPLY' : modeNum === 1 ? 'SHADOW' : 'OFF';
+  const modeEl = document.getElementById('res-mode');
+  modeEl.textContent = modeLabel;
+  modeEl.className = 'mode ' + modeLabel;
+
+  for (const k of ['off', 'shadow', 'apply']) {
+    const b = document.getElementById('res-btn-' + k);
+    if (b) b.classList.toggle('active', k === modeLabel.toLowerCase());
+  }
+
+  if (!m || !m.received) {
+    document.getElementById('res-samples').textContent = '—';
+    document.getElementById('res-theta-s').textContent = '—';
+    document.getElementById('res-theta-d').textContent = '—';
+    document.getElementById('res-ds-nom').textContent = '—';
+    document.getElementById('res-ds-res').textContent = '—';
+    document.getElementById('res-ds-res').className = '';
+    document.getElementById('res-dd-nom').textContent = '—';
+    document.getElementById('res-dd-res').textContent = '—';
+    document.getElementById('res-dd-res').className = '';
+    drawResSparkline();
+    return;
+  }
+
+  const num = (v, d = 0) => Number.isFinite(v) ? +v : d;
+  const samples = num(m.samples_trained) | 0;
+  const thetaS  = num(m.theta_s_norm);
+  const thetaD  = num(m.theta_d_norm);
+  const nomS    = num(m.nom_s);
+  const nomD    = num(m.nom_d);
+  const resS    = num(m.res_es);
+  const resD    = num(m.res_ed);
+
+  document.getElementById('res-samples').textContent = samples.toString();
+  document.getElementById('res-theta-s').textContent = thetaS.toFixed(2);
+  document.getElementById('res-theta-d').textContent = thetaD.toFixed(2);
+
+  document.getElementById('res-ds-nom').textContent = nomS.toFixed(3);
+  const dsRes = document.getElementById('res-ds-res');
+  dsRes.textContent = resS.toFixed(3);
+  dsRes.className = resS < nomS ? 'err-better' : resS > nomS ? 'err-worse' : '';
+
+  document.getElementById('res-dd-nom').textContent = nomD.toFixed(3);
+  const ddRes = document.getElementById('res-dd-res');
+  ddRes.textContent = resD.toFixed(3);
+  ddRes.className = resD < nomD ? 'err-better' : resD > nomD ? 'err-worse' : '';
+
+  resEsHistory.push(resS);
+  if (resEsHistory.length > RES_SPARK_MAX) resEsHistory.shift();
+  drawResSparkline();
+}
+
+const PLANNERS = ['pure_pursuit', 'mpc', 'opencv'];
+let currentPlanner = null;   // unknown until first click — backend doesn't expose this
+
+async function setPlanner(planner) {
+  const btn = document.getElementById('plan-btn-' + planner);
+  try {
+    const r = await fetch(API + '/pathfinder/planner', {
+      method: 'POST',
+      headers: {'Content-Type': 'application/json'},
+      body: JSON.stringify({planner}),
+    });
+    const body = await r.json().catch(() => ({}));
+    if (!r.ok) throw new Error(body.error || ('HTTP ' + r.status));
+    currentPlanner = (body.planner || planner);
+    for (const p of PLANNERS) {
+      const b = document.getElementById('plan-btn-' + p);
+      if (b) b.classList.toggle('active', p === currentPlanner);
+    }
+  } catch(e) {
+    console.error('setPlanner failed', e);
+    if (btn) {
+      btn.classList.add('flash');
+      setTimeout(() => btn.classList.remove('flash'), 600);
+    }
+  }
+}
+
+let lineStatusTimeout = null;
+function showLineStatus(text, kind) {
+  const el = document.getElementById('line-status');
+  if (!el) return;
+  el.textContent = text;
+  el.className = 'line-status ' + (kind || '');
+  if (lineStatusTimeout) clearTimeout(lineStatusTimeout);
+  lineStatusTimeout = setTimeout(() => {
+    el.textContent = '';
+    el.className = 'line-status';
+  }, 3000);
+}
+
+async function loadLine() {
+  const input = document.getElementById('line-input');
+  const select = document.getElementById('line-select');
+  const typed = (input.value || '').trim();
+  const path = typed || select.value;
+  if (!path) {
+    showLineStatus('no path selected', 'err');
+    return;
+  }
+  try {
+    const r = await fetch(API + '/pathfinder/line_path', {
+      method: 'POST',
+      headers: {'Content-Type': 'application/json'},
+      body: JSON.stringify({path}),
+    });
+    const body = await r.json().catch(() => ({}));
+    if (!r.ok) throw new Error(body.error || ('HTTP ' + r.status));
+    const used = body.path || path;
+    const base = used.split('/').pop();
+    showLineStatus('loaded ' + base, 'ok');
+    // Pick up the new line on the next /racing_line poll so the viz redraws.
+    fetchLine();
+  } catch(e) {
+    console.error('loadLine failed', e);
+    showLineStatus(String(e.message || e), 'err');
+  }
+}
+
+function drawResSparkline() {
+  const cv = document.getElementById('res-spark');
+  if (!cv) return;
+  const sctx = cv.getContext('2d');
+  const w = cv.width, h = cv.height;
+  sctx.clearRect(0, 0, w, h);
+  if (resEsHistory.length < 2) return;
+  let mx = 0;
+  for (const v of resEsHistory) if (v > mx) mx = v;
+  if (mx < 1e-9) mx = 1;
+  sctx.beginPath();
+  sctx.strokeStyle = '#60a5fa';
+  sctx.lineWidth = 1;
+  const n = resEsHistory.length;
+  for (let i = 0; i < n; i++) {
+    const x = (i / Math.max(1, RES_SPARK_MAX - 1)) * w;
+    const y = h - 1 - (resEsHistory[i] / mx) * (h - 2);
+    if (i === 0) sctx.moveTo(x, y); else sctx.lineTo(x, y);
+  }
+  sctx.stroke();
 }
 
 async function fetchLines() {
@@ -247,7 +548,6 @@
   } catch(e) { dynamicLine = null; }
 }
 
-// Display order roughly groups: line-tracking, speed, control, safety, reward.
 const COST_ROWS = [
   ['d',                'lateral d',     '#60a5fa'],
   ['terminal_d',       'term d',        '#60a5fa'],
@@ -260,7 +560,7 @@
   ['edge',             'edge (inner)',  '#fb7185'],
   ['boundary',         'corridor',      '#f87171'],
   ['a_lat',            'a_lat',         '#f87171'],
-  ['progress',         'progress',      '#4ade80'],  // negative = reward
+  ['progress',         'progress',      '#4ade80'],
 ];
 
 function renderCosts() {
@@ -289,7 +589,6 @@
     const sign = v < 0 ? '−' : '';
 
     const tr = document.createElement('tr');
-
     const tdName = document.createElement('td');
     tdName.className = 'name';
     tdName.textContent = label;
@@ -313,14 +612,24 @@
   }
 }
 
-// ── Draw ──
+// Deterministic Line segment intersection check (returns [x,y] or null)
+function findLineIntersection(p0_x, p0_y, p1_x, p1_y, p2_x, p2_y, p3_x, p3_y) {
+  let s1_x = p1_x - p0_x, s1_y = p1_y - p0_y;
+  let s2_x = p3_x - p2_x, s2_y = p3_y - p2_y;
+
+  let s = (-s1_y * (p0_x - p2_x) + s1_x * (p0_y - p2_y)) / (-s2_x * s1_y + s1_x * s2_y);
+  let t = ( s2_x * (p0_y - p2_y) - s2_y * (p0_x - p2_x)) / (-s2_x * s1_y + s1_x * s2_y);
+
+  if (s >= 0 && s <= 1 && t >= 0 && t <= 1) {
+    return [p0_x + (t * s1_x), p0_y + (t * s1_y)];
+  }
+  return null;
+}
+
 function draw() {
   requestAnimationFrame(draw);
-
-  // Smooth zoom
   cam.zoom += (cam.targetZoom - cam.zoom) * 0.15;
 
-  // Auto-follow if not dragging
   if (!dragging) {
     cam.x += (odom.x - cam.x) * 0.05;
     cam.y += (odom.y - cam.y) * 0.05;
@@ -329,41 +638,9 @@
   ctx.fillStyle = '#0a0a0f';
   ctx.fillRect(0, 0, C.width, C.height);
 
-  // Grid
   drawGrid();
+  drawTrackBorders();
 
-  // Racing line
-  if (racingLine.length > 1) {
-    ctx.beginPath();
-    ctx.strokeStyle = '#1a2a3a';
-    ctx.lineWidth = Math.max(1, 2 * cam.zoom);
-    ctx.setLineDash([8, 6]);
-    let [sx, sy] = w2s(racingLine[0][0], racingLine[0][1]);
-    ctx.moveTo(sx, sy);
-    for (let i = 1; i < racingLine.length; i++) {
-      [sx, sy] = w2s(racingLine[i][0], racingLine[i][1]);
-      ctx.lineTo(sx, sy);
-    }
-    ctx.closePath();
-    ctx.stroke();
-    ctx.setLineDash([]);
-  }
-
-  // Bezier rejoin overlay (when active — this is what MPC is actually tracking)
-  if (dynamicLine && dynamicLine.active && dynamicLine.points && dynamicLine.points.length > 1) {
-    ctx.beginPath();
-    ctx.strokeStyle = '#f59e0b';
-    ctx.lineWidth = Math.max(2, 2.5 * cam.zoom);
-    let [bx, by] = w2s(dynamicLine.points[0][0], dynamicLine.points[0][1]);
-    ctx.moveTo(bx, by);
-    for (let i = 1; i < dynamicLine.points.length; i++) {
-      [bx, by] = w2s(dynamicLine.points[i][0], dynamicLine.points[i][1]);
-      ctx.lineTo(bx, by);
-    }
-    ctx.stroke();
-  }
-
-  // Trail (gradient fade)
   if (trail.length > 1) {
     for (let i = 1; i < trail.length; i++) {
       const alpha = (i / trail.length) * 0.7;
@@ -378,10 +655,10 @@
     }
   }
 
-  // Kart
-  drawKart(odom.x, odom.y, odom.yaw);
+  // Calculate and Draw Raycast Intersection lines
+  computeAndDrawRaycasts(odom.x, odom.y, odom.yaw);
 
-  // HUD
+  drawKart(odom.x, odom.y, odom.yaw);
   updateHUD();
 }
 
@@ -389,7 +666,6 @@
   const step = cam.zoom > 8 ? 5 : cam.zoom > 2 ? 10 : cam.zoom > 0.8 ? 50 : 100;
   ctx.strokeStyle = '#111118';
   ctx.lineWidth = 0.5;
-  // Visible world bounds
   const wLeft   = cam.x - C.width / 2 / cam.zoom;
   const wRight  = cam.x + C.width / 2 / cam.zoom;
   const wBottom = cam.y - C.height / 2 / cam.zoom;
@@ -406,31 +682,179 @@
   }
 }
 
+function drawTrackBorders() {
+  if (leftTrackEdge.length < 2) return;
+
+  // Draw Left Edge (Solid Dark Blue Line)
+  ctx.beginPath();
+  ctx.strokeStyle = '#1e293b';
+  ctx.lineWidth = 2;
+  let [sx, sy] = w2s(leftTrackEdge[0][0], leftTrackEdge[0][1]);
+  ctx.moveTo(sx, sy);
+  for(let i=1; i<leftTrackEdge.length; i++) {
+    [sx, sy] = w2s(leftTrackEdge[i][0], leftTrackEdge[i][1]);
+    ctx.lineTo(sx, sy);
+  }
+  ctx.closePath();
+  ctx.stroke();
+
+  // Draw Right Edge
+  ctx.beginPath();
+  ctx.strokeStyle = '#1e293b';
+  ctx.lineWidth = 2;
+  [sx, sy] = w2s(rightTrackEdge[0][0], rightTrackEdge[0][1]);
+  ctx.moveTo(sx, sy);
+  for(let i=1; i<rightTrackEdge.length; i++) {
+    [sx, sy] = w2s(rightTrackEdge[i][0], rightTrackEdge[i][1]);
+    ctx.lineTo(sx, sy);
+  }
+  ctx.closePath();
+  ctx.stroke();
+}
+
+function computeAndDrawRaycasts(x, y, yaw) {
+  const fov_deg = 45.0;
+  const half_fov_rad = (fov_deg / 2.0) * Math.PI / 180.0;
+  const max_ray_len = 25.0;
+  const circle_offset = 1.0;
+
+  // Keep legacy canvas space matching conventions (-yaw and y - sin)
+  const leftYaw = -yaw + half_fov_rad;
+  const rightYaw = -yaw - half_fov_rad;
+
+  // Standard ray targets (clipped by walls during intersection)
+  let leftTargetX = x + max_ray_len * Math.cos(leftYaw);
+  let leftTargetY = y - max_ray_len * Math.sin(leftYaw);
+
+  let rightTargetX = x + max_ray_len * Math.cos(rightYaw);
+  let rightTargetY = y - max_ray_len * Math.sin(rightYaw);
+
+  let finalLeftIntersect = [leftTargetX, leftTargetY];
+  let finalRightIntersect = [rightTargetX, rightTargetY];
+
+  let leftFound = false;
+  let rightFound = false;
+  let minLeftDist = max_ray_len;
+  let minRightDist = max_ray_len;
+
+  const boundaries = [leftTrackEdge, rightTrackEdge];
+
+  for (const edge of boundaries) {
+    if (edge.length < 2) continue;
+    for (let i = 0; i < edge.length; i++) {
+      let p2 = edge[i];
+      let p3 = edge[(i + 1) % edge.length];
+
+      // Evaluate Left Ray Path
+      let hitL = findLineIntersection(x, y, leftTargetX, leftTargetY, p2[0], p2[1], p3[0], p3[1]);
+      if (hitL) {
+        let dist = Math.hypot(hitL[0] - x, hitL[1] - y);
+        if (dist < minLeftDist) {
+          minLeftDist = dist;
+          finalLeftIntersect = hitL;
+          leftFound = true;
+        }
+      }
+
+      // Evaluate Right Ray Path
+      let hitR = findLineIntersection(x, y, rightTargetX, rightTargetY, p2[0], p2[1], p3[0], p3[1]);
+      if (hitR) {
+        let dist = Math.hypot(hitR[0] - x, hitR[1] - y);
+        if (dist < minRightDist) {
+          minRightDist = dist;
+          finalRightIntersect = hitR;
+          rightFound = true;
+        }
+      }
+    }
+  }
+
+  // 1. DYNAMICALLY SCALE CENTER DISTANCE
+  // Calculate center length based on current wall proximity, then add your offset buffer
+  let currentActualForwardDist = (minLeftDist + minRightDist) / 2.0;
+  let dynamicCenterLen = currentActualForwardDist + circle_offset;
+
+  let centerTargetX = x + dynamicCenterLen * Math.cos(-yaw);
+  let centerTargetY = y - dynamicCenterLen * Math.sin(-yaw);
+  let centerTargetIntersect = [centerTargetX, centerTargetY];
+
+  // 2. GENERATE MATCHING PROPORTIONAL OUTER OFFSETS
+  // Scale the unclipped reference offsets using the exact same dynamic length
+  let offsetLeftTargetX = x + dynamicCenterLen * Math.cos(leftYaw);
+  let offsetLeftTargetY = y - dynamicCenterLen * Math.sin(leftYaw);
+  let offsetRightTargetX = x + dynamicCenterLen * Math.cos(rightYaw);
+  let offsetRightTargetY = y - dynamicCenterLen * Math.sin(rightYaw);
+
+  let offsetFinalLeftIntersect = [offsetLeftTargetX, offsetLeftTargetY];
+  let offsetFinalRightIntersect = [offsetRightTargetX, offsetRightTargetY];
+
+  function calculateAngleAtVertex(intersect, offset_pt, straight_pt) {
+    const a = Math.hypot(offset_pt[0] - straight_pt[0], offset_pt[1] - straight_pt[1]); 
+    const b = Math.hypot(intersect[0] - straight_pt[0], intersect[1] - straight_pt[1]); 
+    const c = Math.hypot(offset_pt[0] - intersect[0], offset_pt[1] - intersect[1]); 
+
+    if (a === 0 || b === 0) return 0.0;
+
+    let cosTheta = (a**2 + b**2 - c**2) / (2 * a * b);
+    cosTheta = Math.max(-1.0, Math.min(1.0, cosTheta)); 
+
+    return (Math.acos(cosTheta) * 180.0) / Math.PI;
+  }
+
+  let angleL = leftFound ? calculateAngleAtVertex(finalLeftIntersect, offsetFinalLeftIntersect, centerTargetIntersect) : 22.5;
+  let angleR = rightFound ? calculateAngleAtVertex(finalRightIntersect, offsetFinalRightIntersect, centerTargetIntersect) : 22.5;
+
+  currentComputedAngles = [angleL, angleR];
+
+  // Convert map space coordinates to screen buffer space for drawing traces
+  const [sKartX, sKartY] = w2s(x, y);
+  const [sLeftX, sLeftY] = w2s(finalLeftIntersect[0], finalLeftIntersect[1]);
+  const [sRightX, sRightY] = w2s(finalRightIntersect[0], finalRightIntersect[1]);
+  const [sCenterX, sCenterY] = w2s(centerTargetIntersect[0], centerTargetIntersect[1]);
+
+  // Render Cyan Left System Trace
+  ctx.beginPath();
+  ctx.strokeStyle = leftFound ? '#22d3ee' : 'rgba(167, 139, 250, 0.4)';
+  ctx.lineWidth = leftFound ? 2.5 : 1.0;
+  ctx.moveTo(sKartX, sKartY);
+  ctx.lineTo(sLeftX, sLeftY);
+  ctx.stroke();
+
+  // Render Pink Right System Trace
+  ctx.beginPath();
+  ctx.strokeStyle = rightFound ? '#f43f5e' : 'rgba(244, 63, 94, 0.4)';
+  ctx.lineWidth = rightFound ? 2.5 : 1.0;
+  ctx.moveTo(sKartX, sKartY);
+  ctx.lineTo(sRightX, sRightY);
+  ctx.stroke();
+
+  // Render Amber Center Line Trace (Now moves dynamically with your sensory limits!)
+  ctx.beginPath();
+  ctx.strokeStyle = '#f59e0b';
+  ctx.lineWidth = 2.5;
+  ctx.moveTo(sKartX, sKartY);
+  ctx.lineTo(sCenterX, sCenterY); 
+  ctx.stroke();
+}
+
 function drawKart(x, y, yaw) {
   const [sx, sy] = w2s(x, y);
-  const len = Math.max(12, 1.05 * cam.zoom * 2);  // wheelbase-ish
+  const len = Math.max(12, 1.05 * cam.zoom * 2);
   const wid = len * 0.5;
 
   ctx.save();
   ctx.translate(sx, sy);
-  ctx.rotate(-yaw);  // canvas Y is flipped
+  ctx.rotate(-yaw);
 
-  // Body
   ctx.fillStyle = '#e0e0e0';
   ctx.beginPath();
-  ctx.moveTo(len * 0.6, 0);            // nose
-  ctx.lineTo(-len * 0.4, -wid * 0.5);  // rear left
-  ctx.lineTo(-len * 0.3, 0);           // rear center notch
-  ctx.lineTo(-len * 0.4, wid * 0.5);   // rear right
+  ctx.moveTo(len * 0.6, 0);
+  ctx.lineTo(-len * 0.4, -wid * 0.5);
+  ctx.lineTo(-len * 0.3, 0);
+  ctx.lineTo(-len * 0.4, wid * 0.5);
   ctx.closePath();
   ctx.fill();
 
-  // Direction indicator
-  ctx.fillStyle = '#f87171';
-  ctx.beginPath();
-  ctx.arc(len * 0.35, 0, 2.5, 0, Math.PI * 2);
-  ctx.fill();
-
   ctx.restore();
 }
 
@@ -443,11 +867,15 @@
   document.getElementById('h-yaw').textContent = `${(odom.yaw * 180 / Math.PI).toFixed(1)}°`;
   document.getElementById('h-spd').textContent = `${odom.speed.toFixed(2)} m/s`;
   document.getElementById('h-cmd').textContent = `M:${(odom.motor||0).toFixed(0)}% S:${(odom.steer||0).toFixed(1)}°`;
+  
+  document.getElementById('h-cam-l').textContent = `${currentComputedAngles[0].toFixed(1)}°`;
+  document.getElementById('h-cam-r').textContent = `${currentComputedAngles[1].toFixed(1)}°`;
 
   const s = document.getElementById('status');
   s.textContent = connected ? 'connected' : 'disconnected';
   s.className = connected ? 'ok' : 'err';
 }
+
 async function setState(state) {
   try {
     await fetch(API + '/set_state', {
@@ -455,15 +883,14 @@
       headers: {'Content-Type': 'application/json'},
       body: JSON.stringify({state}),
     });
-  } catch(e) { console.error('setState failed', e); }
+  } catch(e) {}
 }
 
-// ── Init ──
 fetchLine();
-setInterval(fetchOdom, 50);          // 20Hz polling
-setInterval(fetchLine, 5000);        // refresh line occasionally
-setInterval(fetchMpcStatus, 100);    // 10Hz MPC cost breakdown
-setInterval(fetchLines, 200);        // 5Hz dynamic-line (bezier rejoin) overlay
+setInterval(fetchOdom, 50);
+setInterval(fetchLine, 5000);
+setInterval(fetchMpcStatus, 100);
+setInterval(fetchLines, 200);
 draw();
 </script>
 </body>