From dc8b4e4544694fac4e9ec6f1b2eaa301de122630 Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Sat, 28 Feb 2026 13:12:36 -0800
Subject: [PATCH 1/8] agent.tools.validate_settings: Check input settings

Validates settings using OptionsFactory WithMeta schema.
Reports any issues along with suggested fixes.
---
 hypnotoad/agent/__init__.py           |   0
 hypnotoad/agent/tools/__init__.py     |   5 +
 hypnotoad/agent/tools/validate.py     | 188 ++++++++++++++++++++++++++
 hypnotoad/test_suite/test_validate.py |  57 ++++++++
 4 files changed, 250 insertions(+)
 create mode 100644 hypnotoad/agent/__init__.py
 create mode 100644 hypnotoad/agent/tools/__init__.py
 create mode 100644 hypnotoad/agent/tools/validate.py
 create mode 100644 hypnotoad/test_suite/test_validate.py

diff --git a/hypnotoad/agent/__init__.py b/hypnotoad/agent/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/hypnotoad/agent/tools/__init__.py b/hypnotoad/agent/tools/__init__.py
new file mode 100644
index 00000000..eb5e84e6
--- /dev/null
+++ b/hypnotoad/agent/tools/__init__.py
@@ -0,0 +1,5 @@
+from .validate import validate_settings
+
+__all__ = [
+    "validate_settings",
+]
diff --git a/hypnotoad/agent/tools/validate.py b/hypnotoad/agent/tools/validate.py
new file mode 100644
index 00000000..7bd35fc4
--- /dev/null
+++ b/hypnotoad/agent/tools/validate.py
@@ -0,0 +1,188 @@
+"""
+Validate a dict of Hypnotoad settings, returning a dict
+describing issues.
+"""
+
+import difflib
+import inspect
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any
+
+from ...cases import tokamak
+from ...core.mesh import BoutMesh
+
+POSSIBLE_OPTIONS = (
+    tokamak.TokamakEquilibrium.user_options_factory.defaults
+    | tokamak.TokamakEquilibrium.nonorthogonal_options_factory.defaults
+    | BoutMesh.user_options_factory.defaults
+)
+
+POSSIBLE_KEYS = [opt for opt in POSSIBLE_OPTIONS]
+
+
+class IssueType(str, Enum):
+    UNKNOWN_KEY = "unknown_key"
+    WRONG_TYPE = "wrong_type"
+    OUT_OF_BOUNDS = "out_of_bounds"
+    INVALID_VALUE = "invalid_value"
+    MISSING = "missing"
+
+
+@dataclass
+class SettingIssue:
+    issue_type: IssueType
+    message: str
+    suggestions: list[str] = None
+    expected: Any = None
+    got: Any = None
+
+    def to_dict(self) -> dict:
+        d = {"issue_type": self.issue_type.value, "message": self.message}
+        if self.suggestions:
+            d["suggestions"] = self.suggestions
+        if self.expected is not None:
+            d["expected"] = str(self.expected)
+        if self.got is not None:
+            d["got"] = str(self.got)
+        return d
+
+
+def _describe_type(value_type) -> str:
+    """Human-readable type description from value_type field."""
+    if value_type is None:
+        return "any"
+    if isinstance(value_type, (list, tuple)):
+        names = [t.__name__ if t is not None else "None" for t in value_type]
+        return " or ".join(names)
+    return value_type.__name__
+
+
+def _check_value_type(value: Any, value_type) -> bool:
+    """Check value against value_type, which may be a type, list of types, or None."""
+    if value_type is None:
+        return True  # Any type accepted
+    types = value_type if isinstance(value_type, (list, tuple)) else [value_type]
+    # Treat None in the type list as allowing Python None
+    allowed_types = tuple(t for t in types if t is not None)
+    none_allowed = any(t is None for t in types)
+    if value is None:
+        return none_allowed
+    # Be lenient: int is acceptable where float is expected
+    if isinstance(value, bool):
+        # bool is a subclass of int -- only allow if bool is explicitly in types
+        return bool in types or (not allowed_types)
+    if isinstance(value, int) and float in types:
+        return True
+    return isinstance(value, allowed_types)
+
+
+def _run_checks(value: Any, meta: WithMeta) -> list[str]:
+    """
+    Run check_all and check_any against value.
+    Returns list of failure messages (empty = all passed).
+    """
+    failures = []
+
+    # check_all: every check must pass
+    check_all = meta.check_all if hasattr(meta, "check_all") else None
+    if check_all is not None:
+        checks = check_all if isinstance(check_all, (list, tuple)) else [check_all]
+        for check in checks:
+            try:
+                if not check(value):
+                    # Try to get a useful description of the check
+                    src = (
+                        inspect.getsource(check).strip()
+                        if hasattr(check, "__code__")
+                        else str(check)
+                    )
+                    failures.append(f"Failed check: {src}")
+            except Exception as e:
+                failures.append(f"Check raised exception: {e}")
+
+    # check_any: at least one check must pass
+    check_any = meta.check_any if hasattr(meta, "check_any") else None
+    if check_any is not None:
+        checks = check_any if isinstance(check_any, (list, tuple)) else [check_any]
+        passed = False
+        for check in checks:
+            try:
+                if check(value):
+                    passed = True
+                    break
+            except Exception:
+                pass
+        if not passed:
+            srcs = []
+            for check in checks:
+                try:
+                    srcs.append(inspect.getsource(check).strip())
+                except Exception:
+                    srcs.append(str(check))
+            failures.append(f"Must satisfy at least one of: {'; '.join(srcs)}")
+
+    return failures
+
+
+def validate_settings(settings: dict = {}) -> dict:
+    """Check settings for common issues before running.
+
+    Returns a dictionary with a boolean flag 'valid'
+    and a dict of 'issues' indexed by keys that do not
+    pass validation.
+    """
+    issues = {}
+
+    for key, value in settings.items():
+        # Unknown keys
+        if key not in POSSIBLE_KEYS:
+            suggestions = difflib.get_close_matches(key, POSSIBLE_KEYS, n=3, cutoff=0.6)
+            issues[key] = SettingIssue(
+                issue_type=IssueType.UNKNOWN_KEY,
+                message=f"'{key}' is not a recognised Hypnotoad option",
+                suggestions=suggestions or None,
+            ).to_dict()
+            continue
+
+        # Validate using WithMeta
+        meta = POSSIBLE_OPTIONS[key]
+        value_type = getattr(meta, "value_type", None)
+        allowed = getattr(meta, "allowed", None)
+
+        if not _check_value_type(value, value_type):
+            issues[key] = SettingIssue(
+                issue_type=IssueType.WRONG_TYPE,
+                message=(
+                    f"'{key}' expects {_describe_type(value_type)}, "
+                    f"got {type(value).__name__}"
+                ),
+                expected=_describe_type(value_type),
+                got=type(value).__name__,
+            ).to_dict()
+            continue  # no point checking further with wrong type
+
+        # Allowed values check
+        if allowed is not None and value not in allowed:
+            suggestions = difflib.get_close_matches(
+                str(value), [str(v) for v in allowed], n=3, cutoff=0.5
+            )
+            issues[key] = SettingIssue(
+                issue_type=IssueType.INVALID_VALUE,
+                message=f"'{key}' = {value!r} not in allowed values: {allowed}",
+                expected=allowed,
+                got=value,
+                suggestions=suggestions or None,
+            ).to_dict()
+            continue
+
+        # check_all / check_any
+        check_failures = _run_checks(value, meta)
+        if check_failures:
+            issues[key] = SettingIssue(
+                issue_type=IssueType.OUT_OF_BOUNDS,
+                message=f"'{key}' = {value!r} failed validation: {'; '.join(check_failures)}",
+                got=value,
+            ).to_dict()
+
+    return {"valid": len(issues) == 0, "issues": issues}
diff --git a/hypnotoad/test_suite/test_validate.py b/hypnotoad/test_suite/test_validate.py
new file mode 100644
index 00000000..65713c2b
--- /dev/null
+++ b/hypnotoad/test_suite/test_validate.py
@@ -0,0 +1,57 @@
+from hypnotoad.agent import tools
+
+
+def test_validate_unknown_key():
+    # Using an invalid key should lead to an issue and suggestion
+    assert tools.validate_settings({"nxcore": 10}) == {
+        "valid": False,
+        "issues": {
+            "nxcore": {
+                "issue_type": "unknown_key",
+                "message": "'nxcore' is not a recognised Hypnotoad option",
+                "suggestions": ["nx_core"],
+            }
+        },
+    }
+
+
+def test_validate_wrong_type():
+    # Using the wrong type
+    assert tools.validate_settings({"nx_core": 3.4}) == {
+        "valid": False,
+        "issues": {
+            "nx_core": {
+                "issue_type": "wrong_type",
+                "message": "'nx_core' expects int, got float",
+                "expected": "int",
+                "got": "float",
+            }
+        },
+    }
+
+
+def test_validate_invalid_value():
+    assert tools.validate_settings({"curvature_type": "nonsense"}) == {
+        "valid": False,
+        "issues": {
+            "curvature_type": {
+                "issue_type": "invalid_value",
+                "message": "'curvature_type' = 'nonsense' not in allowed values: ('curl(b/B)', 'curl(b/B) with x-y derivatives', 'bxkappa')",
+                "expected": "('curl(b/B)', 'curl(b/B) with x-y derivatives', 'bxkappa')",
+                "got": "nonsense",
+            }
+        },
+    }
+
+
+def test_validate_out_of_bounds():
+    assert tools.validate_settings({"refine_width": -1.0}) == {
+        "valid": False,
+        "issues": {
+            "refine_width": {
+                "issue_type": "out_of_bounds",
+                "message": "'refine_width' = -1.0 failed validation: Failed check: def is_positive(x):\n    try:\n        return x > 0\n    except TypeError:\n        return False",
+                "got": "-1.0",
+            }
+        },
+    }

From 38842f31d338fc379fe04c6064f9c6716a6c6220 Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Mon, 2 Mar 2026 20:43:52 -0800
Subject: [PATCH 2/8] Hypnotoad agent

Initial version of an AI agent to run Hypnotoad.
---
 hypnotoad/agent/__init__.py                   |  41 ++
 hypnotoad/agent/hypnotoad_agent.py            | 563 ++++++++++++++++++
 hypnotoad/agent/tools/__init__.py             |   8 +-
 hypnotoad/agent/tools/describe_equilibrium.py |  58 ++
 hypnotoad/agent/tools/inspect_mesh.py         | 164 +++++
 hypnotoad/agent/tools/search.py               | 111 ++++
 .../{validate.py => validate_settings.py}     |  41 +-
 7 files changed, 967 insertions(+), 19 deletions(-)
 create mode 100644 hypnotoad/agent/hypnotoad_agent.py
 create mode 100644 hypnotoad/agent/tools/describe_equilibrium.py
 create mode 100644 hypnotoad/agent/tools/inspect_mesh.py
 create mode 100644 hypnotoad/agent/tools/search.py
 rename hypnotoad/agent/tools/{validate.py => validate_settings.py} (84%)

diff --git a/hypnotoad/agent/__init__.py b/hypnotoad/agent/__init__.py
index e69de29b..34f43f7e 100644
--- a/hypnotoad/agent/__init__.py
+++ b/hypnotoad/agent/__init__.py
@@ -0,0 +1,41 @@
+"""
+LLM agent that generates tokamak meshes using Hypnotoad.
+Connects to an LLM server using the openai.OpenAI API.
+
+Usage:
+
+    from hypnotoad import agent
+
+    geqdsk_file = "/path/to/geqdsk.file"
+    ENDPOINT_URL = "https://llm.server/v1"
+    API_KEY = abdefghijklmnopqrstuvwxyz
+    MODEL = "gpt-5-mini"
+
+    hypnotoad_agent = agent.HypnotoadAgent(geqdsk_file, base_url=ENDPOINT_URL,
+                                           api_key=API_KEY, model=MODEL)
+
+In a script or command line use the `chat()` method:
+
+    hypnotoad_agent.chat("Generate a mesh, adjusting settings until a mesh is successfully generated.")
+
+In a Jupyter notebook the `chat_nb()` method will use `ipywidgets` and `IPython`
+packages to display the output of the model and tool execution:
+
+    hypnotoad_agent.chat_nb("Generate a mesh, adjusting settings until a mesh is successfully generated.")
+
+Further questions and instructions can be issued:
+
+    hypnotoad_agent.chat_nb("Please adjust settings to generate a mesh with psinorm_sol = 1.09")
+
+Meshes generated are stored in `mesh_history`. The last mesh is available:
+
+    hypnotoad_agent.last_mesh
+
+To quickly plot the last mesh:
+
+    hypnotoad_agent.plot_last_mesh()
+"""
+
+from .hypnotoad_agent import HypnotoadAgent
+
+__all__ = ["HypnotoadAgent"]
diff --git a/hypnotoad/agent/hypnotoad_agent.py b/hypnotoad/agent/hypnotoad_agent.py
new file mode 100644
index 00000000..a14e5837
--- /dev/null
+++ b/hypnotoad/agent/hypnotoad_agent.py
@@ -0,0 +1,563 @@
+import logging
+import json
+import pprint
+from typing import Optional
+from ..cases import tokamak
+from ..core.mesh import BoutMesh
+from . import tools
+
+logger = logging.getLogger(__name__)
+
+SYSTEM_PROMPT = """
+You are an expert assistant for generating 2D tokamak plasma simulation meshes 
+using Hypnotoad. You help users configure mesh settings, run the mesh generator, 
+diagnose problems, and iteratively improve mesh quality. The user is a plasma
+physicist and you can use technical terminology freely.
+
+## Tools Available
+
+- get_equilibrium_info: Describe the magnetic equilibrium and geometry.
+  Call this at the start of a session or when you need to understand the
+  physics before choosing mesh settings. Returns metrics that inform
+  psinorm, resolution and spacing choices.
+
+- validate_settings(settings): Check a settings dict for missing required keys 
+  and type errors before running.
+
+- run_hypnotoad(settings): Run the mesh generator. Returns success/error and 
+  mesh diagnostics. On error, the message will include the exception message.
+
+- inspect_mesh(detail): Inspect the most recently generated mesh file.
+
+- search_hypnotoad_options(query, k): Search the Hypnotoad settings options
+  reference, returning the k most relevant results. Do not guess settings keys.
+
+## Workflow
+
+Follow these steps in order. Do not skip steps or change their sequence.
+
+### Step 1 — Establish initial settings
+
+IF the user provides settings or options:
+  - Call validate_settings with those settings
+  - Fix every issue reported before proceeding
+  - Report what was fixed to the user
+ELSE:
+  - Use settings = {
+        'nx_core': 18,
+        'nx_sol': 18,
+        'orthogonal': True,
+        'psinorm_pf': 0.96,
+        'psinorm_sol': 1.05,
+        'target_all_poloidal_spacing_length': 0.05,
+        'xpoint_poloidal_spacing_length': 0.15,
+    }
+  - call search_hypnotoad_options to identify
+    appropriate changes based on the geometry and user request.
+  - Call validate_settings on the constructed settings and fix any issues
+
+Do not call run_hypnotoad until validate_settings returns valid=true.
+
+### Step 2 — Generate a mesh
+
+Call run_hypnotoad with the most recent validated settings.
+Use the same input to run_hypnotoad as was used to validate_settings. 
+
+IF run_hypnotoad succeeds: go to Step 3.
+IF run_hypnotoad fails:    go to Step 2a.
+
+#### Step 2a — Diagnose and fix a failed run
+
+Do not guess at settings changes. Follow this sequence:
+
+1. Read the error message carefully. If it names a specific option or
+   parameter, call search_hypnotoad_options with that name first.
+   The error "Cannot create connected double-null grid" indicates that
+   nx_inter_sep should be set to 1 or larger. 
+2. Call get_equilibrium_info if you have not already done so. Pay attention to:
+   - The number and psinorm values of the X-points.
+     Only X-points within the normalised psi range [psinorm_pf, psinorm_sol] will
+     be included in the mesh. A maximum of two X-points can be included.
+     If more than two X-points fall within the domain, narrow psinorm_sol to
+     exclude the excess.
+   - The suggested psinorm_pf and psinorm_sol values.
+3. Call search_hypnotoad_options to find options relevant to the error.
+4. Construct corrected settings, then call validate_settings.
+   Fix all reported issues before proceeding.
+5. Call run_hypnotoad with the corrected settings.
+
+Repeat Step 2a up to 3 times. If the mesh still fails after 3 attempts,
+report the full error history to the user and ask for guidance.
+Do not continue attempting without user input.
+
+### Step 3 — Assess mesh quality
+
+Call inspect_mesh(detail="summary") immediately after every successful
+run_hypnotoad, even if the run looked clean.
+
+IF valid=false (negative Jacobian or self-crossing cells):
+  - This mesh must not be used. Treat as a failure.
+  - Call inspect_mesh(detail="standard") to identify which regions failed.
+  - Go to Step 2a to fix the settings.
+
+IF valid=true but warnings are present:
+  - Call inspect_mesh(detail="standard") to investigate.
+  - Report each warning to the user with a plain-language explanation of
+    its physical significance.
+  - Suggest specific settings changes that would address each warning.
+    Support each suggestion with a search_hypnotoad_options call —
+    do not suggest option names from memory.
+  - Ask the user whether to proceed with improvements or accept the mesh.
+
+IF valid=true and no warnings:
+  - Report the mesh quality summary to the user.
+  - Ask the user whether the mesh is acceptable or further refinement
+    is needed.
+
+    
+### General rules
+
+- Never call run_hypnotoad with settings that have not passed validate_settings.
+- Never suggest or use an option name that has not been confirmed by
+  search_hypnotoad_options or returned by validate_settings.
+- Never make more than one settings change at a time without explaining
+  the reason for each change.
+- If uncertain about any step, ask the user before proceeding.
+"""
+
+# Anthropic API format
+TOOLS = [
+    {
+        "name": "validate_settings",
+        "description": "Validate a settings dict before running Hypnotoad.",
+        "input_schema": {
+            "type": "object",
+            "properties": {"settings": {"type": "object"}},
+            "required": ["settings"],
+        },
+    },
+    {
+        "name": "run_hypnotoad",
+        "description": "Run Hypnotoad mesh generator with a settings dict. Returns success/error and mesh metadata.",
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "settings": {
+                    "type": "object",
+                    "description": "Hypnotoad settings dictionary",
+                }
+            },
+            "required": ["settings"],
+        },
+    },
+    {
+        "name": "inspect_mesh",
+        "description": """Inspect mesh quality after generation. Use detail levels
+progressively:
+- 'summary'  (default): global pass/fail + warning list. Always call this first.
+- 'standard': per-region statistics + connections. Call when summary has warnings.
+- 'full':     worst-cell locations, metric tensors, interface continuity details.
+              Call only to diagnose a specific problem identified at standard level.""",
+        "input_schema": {
+            "type": "object",
+            "properties": {"detail": {"type": "string"}},
+            "required": [],
+        },
+    },
+    {
+        "name": "list_meshes",
+        "description": "List all meshes generated in this session with their "
+        "index, pass/fail status, and settings",
+        "input_schema": {"type": "object", "properties": {}, "required": []},
+    },
+    {
+        "name": "search_hypnotoad_options",
+        "description": (
+            "Search the Hypnotoad settings options reference. Returns the k most "
+            "relevant options matching the query, each with its name, default value, "
+            "type, allowed values, and description.\n\n"
+            "Use this tool when:\n"
+            "- You need to know the exact name of an option (e.g. 'what option "
+            "controls poloidal spacing near the X-point?')\n"
+            "- You need to know the default, type, or allowed values for a specific "
+            "option before setting it\n"
+            "- validate_settings has returned an unknown_key error and you want to "
+            "find the correct option name\n"
+            "- You are constructing a settings dict and want to check what options "
+            "are available for a particular aspect of the mesh\n\n"
+            "Do not guess option names. Always use this tool if you are unsure."
+        ),
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "query": {
+                    "type": "string",
+                    "description": (
+                        "Natural language description of the option or behaviour you "
+                        "are looking for. Can be a partial option name, a physical "
+                        "concept, or a description of what you want to control. "
+                        "Examples:\n"
+                        "- 'X-point poloidal spacing'\n"
+                        "- 'number of radial points in SOL'\n"
+                        "- 'target plate resolution'\n"
+                        "- 'nx_inter_sep'\n"
+                        "- 'orthogonal mesh'"
+                    ),
+                },
+                "k": {
+                    "type": "integer",
+                    "description": (
+                        "Number of options to return. Default 4. Use a larger value "
+                        "(up to 10) when exploring an unfamiliar area of the settings "
+                        "space, or when the first results do not contain what you need."
+                    ),
+                    "default": 4,
+                    "minimum": 1,
+                    "maximum": 10,
+                },
+            },
+            "required": ["query"],
+        },
+    },
+]
+
+
+def to_openai_tools(anthropic_tools: list[dict]) -> list[dict]:
+    """Convert Anthropic-format tool definitions to OpenAI format."""
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name": t["name"],
+                "description": t["description"],
+                "parameters": t["input_schema"],
+            },
+        }
+        for t in anthropic_tools
+    ]
+
+
+TOOLS_OPENAI = to_openai_tools(TOOLS)
+
+POSSIBLE_OPTIONS = (
+    tokamak.TokamakEquilibrium.user_options_factory.defaults
+    | tokamak.TokamakEquilibrium.nonorthogonal_options_factory.defaults
+    | BoutMesh.user_options_factory.defaults
+)
+
+
+def normalise_arguments(signature: dict, args):
+    """Return a dict matching the given signature or raise a ValueError.
+
+    A common issue with LLM tools is inconsistency in the argument
+    format. Sometimes the arguments are passed in order, other times
+    as a dict with argument names. This function attempts to normalise
+    the arguments to match the signature.
+    """
+    # Check first argument
+    if signature == {}:
+        return {}  # No arguments
+    first_key, first_type = next(iter(signature.items()))
+
+    if isinstance(args, dict):
+        if isinstance(args, first_type):
+            # This dict could either be intended to be the first argument,
+            # or to contain function arguments.
+            if any([key not in signature for key in args]):
+                # Assign arguments to the first key
+                return {first_key: args}
+        # Filter keys to those in the signature
+        return {key: value for key, value in args.items() if key in signature}
+
+    elif isinstance(args, first_type):
+        return {first_key: args}
+    raise ValueError(f"Arguments {args} do not match signature {signature}")
+
+
+def default_handler(title, func, *args, **kwargs):
+    """Wrapper for tool calls"""
+    print(title)
+    return func(*args, **kwargs)
+
+
+class HypnotoadAgent:
+    def __init__(self, gridfile, base_url: str = None, api_key: str = None, model=None):
+
+        from openai import OpenAI
+        from .tools.search import ChunkDatabase, extract_option_chunks
+
+        self.gridfile = gridfile
+        self.logger = logger.getChild(self.__class__.__name__)
+        self.client = OpenAI(base_url=base_url, api_key=api_key)
+        self.model = model
+
+        # Check which models are available
+        available_models = [model.id for model in self.client.models.list()]
+        if model not in available_models:
+            raise ValueError(
+                f"Model {model} not available. Available models are {available_models}"
+            )
+        self.tools = TOOLS_OPENAI
+
+        # Maintain chat history. This is sent to the LLM at each call
+        self.messages = [
+            {"role": "system", "content": SYSTEM_PROMPT},
+        ]
+
+        self.tool_registry = {
+            "get_equilibrium_info": {
+                "function": self.get_equilibrium_info,
+                "signature": {},
+            },
+            "validate_settings": {
+                "function": lambda **args: tools.validate_settings(
+                    POSSIBLE_OPTIONS, **args
+                ),
+                "signature": {"settings": dict},
+            },
+            "run_hypnotoad": {
+                "function": self.run_hypnotoad,
+                "signature": {"settings": dict},
+            },
+            "inspect_mesh": {
+                "function": self._inspect_mesh,
+                "signature": {"mesh_index": int, "detail": str},
+            },
+            "list_meshes": {"function": self.list_meshes, "signature": {}},
+            "search_hypnotoad_options": {
+                "function": self.search_hypnotoad_options,
+                "signature": {"query": str, "k": int},
+            },
+        }
+
+        # Index available options so that the LLM can query
+        self.options_db = ChunkDatabase(extract_option_chunks(POSSIBLE_OPTIONS))
+
+        # Store generated meshes
+        self.mesh_history = []
+
+    def chat(
+        self, user_input: str, max_iterations: int = 20, task_handler=default_handler
+    ) -> str:
+        """
+        task_handler(title, func, *args, **kwargs) : function
+            Wrapper that should print the title and then run func(*args, **kwargs)
+        """
+        self.logger.debug(f"User input: {user_input}")
+        self.messages.append({"role": "user", "content": user_input})
+
+        for it in range(max_iterations):
+            response = self.client.chat.completions.create(
+                model=self.model,
+                tools=TOOLS_OPENAI,
+                messages=self.messages,
+            )
+
+            self.logger.debug(response)
+
+            choice = response.choices[0]
+            msg = choice.message
+
+            # Add assistant message -- must keep tool_calls intact
+            self.messages.append(
+                {
+                    "role": "assistant",
+                    "content": msg.content,  # may be None
+                    "tool_calls": [
+                        {
+                            "id": tc.id,
+                            "type": "function",
+                            "function": {
+                                "name": tc.function.name,
+                                "arguments": tc.function.arguments,  # keep as string
+                            },
+                        }
+                        for tc in msg.tool_calls
+                    ]
+                    if msg.tool_calls
+                    else None,
+                }
+            )
+
+            if choice.finish_reason == "tool_calls":
+                # Add tool results -- one per tool call, matched by tool_call_id
+                for tc in msg.tool_calls:
+                    try:
+                        args = json.loads(tc.function.arguments)
+                    except json.JSONDecodeError as e:
+                        args = {}
+                        result = {
+                            "status": "error",
+                            "error": "invalid_json",
+                            "message": f"Tool arguments were not valid JSON: {e}",
+                            "raw": tc.function.arguments,
+                        }
+                    else:
+                        # Wrap the tool call in a function to pass to task_handler
+                        # This enables output to be captured and redirected in
+                        # the user interface.
+                        def run_task():
+                            print(
+                                f"Calling {tc.function.name}\nInputs: {pprint.pformat(args)}",
+                                flush=True,
+                            )
+
+                            try:
+                                tool = self.tool_registry[tc.function.name]
+                                # Normalise the arguments to match signature
+                                result = tool["function"](
+                                    **normalise_arguments(tool["signature"], args)
+                                )
+                            except Exception as e:
+                                result = {
+                                    "status": "error",
+                                    "message": str(e),
+                                }
+                            print(f"Result: {pprint.pformat(result)}", flush=True)
+                            return result
+
+                        result = task_handler(f"Calling {tc.function.name}", run_task)
+
+                    self.messages.append(
+                        {
+                            "role": "tool",
+                            "tool_call_id": tc.id,  # must match the id in the assistant message
+                            "content": json.dumps(result),
+                        }
+                    )
+            else:
+                return msg.content or ""
+        return "Exceeded maximum iterations. See log for details."
+
+    def chat_nb(self, user_input: str, max_iterations: int = 20):
+        """Wrapper that handles model output in a Jupyter notebook"""
+
+        import ipywidgets as widgets
+        from IPython.display import display
+
+        def run_step(title, func, *args, **kwargs):
+            """Runs a function and captures all output into a collapsible Accordion."""
+            # Create an output widget to capture prints/logs
+            out = widgets.Output()
+
+            # Create the Accordion UI
+            accordion = widgets.Accordion(children=[out], selected_index=None)
+            accordion.set_title(0, f"▶ {title}")
+            display(accordion)
+
+            # Capture the output
+            with out:
+                # Redirect logging to this specific output widget
+                handler = logging.StreamHandler()
+                logger.addHandler(handler)
+                try:
+                    return func(*args, **kwargs)
+                finally:
+                    logger.removeHandler(handler)
+
+        result = self.chat(
+            user_input, task_handler=run_step, max_iterations=max_iterations
+        )
+        print(result)
+
+    def run_hypnotoad(self, settings: dict = {}) -> dict:
+        """Run Hypnotoad with given settings dict"""
+        try:
+            # Read the grid file
+            with open(self.gridfile, "rt") as fh:
+                eq = tokamak.read_geqdsk(fh, settings=settings)
+
+            if isinstance(eq, tuple):
+                raise eq[1]  # Second element is an exception
+            mesh = BoutMesh(eq, settings)
+            mesh.calculateRZ()
+            mesh.geometry()
+            idx = len(self.mesh_history)
+            diagnostics = tools.inspect_mesh(mesh, detail="summary")
+            self.mesh_history.append({"settings": settings, "mesh": mesh})
+            return {
+                "status": "success",
+                "mesh_index": idx,  # <-- LLM uses this for inspect_mesh
+                "n_meshes": len(self.mesh_history),
+                "diagnostics": diagnostics,
+                "hint": f"Use inspect_mesh(mesh_index={idx}, detail='standard') to get more detail, "
+                f"or inspect_mesh(mesh_index=N) for any previous mesh.",
+            }
+        except Exception as e:
+            return {
+                "status": "error",
+                "mesh_index": None,
+                "message": str(e),
+            }
+
+    def _inspect_mesh(self, mesh_index: int = -1, detail: str = "summary") -> dict:
+        """
+        Inspect a previously generated mesh by index.
+        mesh_index: index from run_hypnotoad result. -1 = most recent (default).
+        """
+        if len(self.mesh_history) == 0:
+            return {
+                "status": "error",
+                "message": "No mesh generated. Use run_hypnotoad to generate a mesh.",
+            }
+        try:
+            mesh = self.mesh_history[mesh_index]["mesh"]
+        except Exception as e:
+            return {
+                "status": "error",
+                "message": str(e),
+                "hint": f"mesh_index={mesh_index} must be in range 0 <= mesh_index < {len(self.mesh_history)}.",
+            }
+        return tools.inspect_mesh(mesh, detail=detail)
+
+    def list_meshes(self) -> dict:
+        """Summarise all mesh attempts in this session."""
+        return {
+            "n_meshes": len(self.mesh_history),
+            "meshes": [
+                {
+                    "mesh_index": idx,
+                    "valid": a["diagnostics"].get("valid", False),
+                    "n_warnings": a["diagnostics"].get("n_warnings", 0),
+                    "settings": a["settings"],
+                }
+                for idx, a in enumerate(self.mesh_history)
+            ],
+        }
+
+    def get_equilibrium_info(self) -> dict:
+        """
+        Describe the magnetic equilibrium and geometry. Call this at the start
+        of a session or when you need to understand the physics before choosing
+        mesh settings. Returns metrics that inform resolution and spacing choices.
+        """
+        try:
+            return tools.describe_equilibrium(self.gridfile)
+        except Exception as e:
+            return {"status": "error", "message": str(e)}
+
+    def search_hypnotoad_options(self, query: str, k: int = 4) -> list[dict]:
+        """ """
+        return self.options_db.retrieve(query, k)
+
+    @property
+    def last_mesh(self) -> Optional[BoutMesh]:
+        """The last successfully generated mesh. Can be None."""
+        if len(self.mesh_history) == 0:
+            return None
+        return self.mesh_history[-1]["mesh"]
+
+    @property
+    def last_settings(self) -> Optional[dict]:
+        """Return the settings used to create the most recent successful mesh.
+        Can be None."""
+        if len(self.mesh_history) == 0:
+            return None
+        return self.mesh_history[-1]["settings"]
+
+    def plot_last_mesh(self, ax=None):
+        mesh = self.mesh
+        if mesh is None:
+            return
+        ax = mesh.plotPotential(ax=ax)
+        return mesh.plotGridCellEdges(ax=ax)
diff --git a/hypnotoad/agent/tools/__init__.py b/hypnotoad/agent/tools/__init__.py
index eb5e84e6..4626460e 100644
--- a/hypnotoad/agent/tools/__init__.py
+++ b/hypnotoad/agent/tools/__init__.py
@@ -1,5 +1,5 @@
-from .validate import validate_settings
+from .validate_settings import validate_settings
+from .inspect_mesh import inspect_mesh
+from .describe_equilibrium import describe_equilibrium
 
-__all__ = [
-    "validate_settings",
-]
+__all__ = ["validate_settings", "inspect_mesh", "describe_equilibrium"]
diff --git a/hypnotoad/agent/tools/describe_equilibrium.py b/hypnotoad/agent/tools/describe_equilibrium.py
new file mode 100644
index 00000000..5c4d2271
--- /dev/null
+++ b/hypnotoad/agent/tools/describe_equilibrium.py
@@ -0,0 +1,58 @@
+def describe_equilibrium(gridfile) -> dict:
+    """
+    Extract physics-relevant metrics from a GEQDSK file that
+    inform good mesh settings choices.
+    """
+    from ...geqdsk import _geqdsk
+    from ...utils import critical
+    import numpy as np
+
+    with open(gridfile, "rt") as fh:
+        data = _geqdsk.read(fh)
+
+    # Range of psi normalises psi derivatives
+    psi_bdry_gfile = data["sibdry"]
+    psi_axis_gfile = data["simagx"]
+
+    R1D = np.linspace(
+        data["rleft"], data["rleft"] + data["rdim"], data["nx"], endpoint=True
+    )
+
+    Z1D = np.linspace(
+        data["zmid"] - 0.5 * data["zdim"],
+        data["zmid"] + 0.5 * data["zdim"],
+        data["ny"],
+        endpoint=True,
+    )
+
+    psi2D = data["psi"]
+    # Find critical points (O- and X-points)
+    R2D, Z2D = np.meshgrid(R1D, Z1D, indexing="ij")
+    opoints, xpoints = critical.find_critical(R2D, Z2D, psi2D, 1.0e-6, 1000)
+
+    warning_list = []
+    if len(opoints) == 0:
+        warning_list.append("No O-points found in input magnetic field.")
+        magnetic_axis = None
+    else:
+        magnetic_axis = {"R": opoints[0][0], "Z": opoints[0][1], "psinorm": 0.0}
+
+    if len(xpoints) == 0:
+        warning_list.append("No X-points found in input magnetic field.")
+        xpoints = None
+    else:
+        xpoints = [
+            {
+                "R": r,
+                "Z": z,
+                "psinorm": (psi - psi_axis_gfile) / (psi_bdry_gfile - psi_axis_gfile),
+            }
+            for r, z, psi in xpoints[:3]
+        ]  # Maximum 3
+
+    return {
+        "psi_increasing": psi_bdry_gfile > psi_axis_gfile,
+        "magnetic_axis": magnetic_axis,
+        "xpoints": xpoints,
+        "warnings": warning_list,
+    }
diff --git a/hypnotoad/agent/tools/inspect_mesh.py b/hypnotoad/agent/tools/inspect_mesh.py
new file mode 100644
index 00000000..19002b76
--- /dev/null
+++ b/hypnotoad/agent/tools/inspect_mesh.py
@@ -0,0 +1,164 @@
+"""
+Functions to inspect a BoutMesh object and return summary information
+"""
+
+import numpy as np
+from typing import Optional
+
+
+def _region_stats(arr: np.ndarray, name: str) -> dict:
+    """Compute statistics for a 2D array of cell sizes or metric values."""
+    if np.any(~np.isfinite(arr)):
+        return {"error": f"{name} contains non-finite values"}
+    return {
+        "min": float(np.min(arr)),
+        "max": float(np.max(arr)),
+        "mean": float(np.mean(arr)),
+        "uniformity_ratio": float(np.max(arr) / np.min(arr))
+        if np.min(arr) > 0
+        else None,
+    }
+
+
+def _max_adjacent_ratio(arr2d: np.ndarray) -> float:
+    """
+    Maximum ratio between adjacent cell sizes in either direction.
+    Values >> 1 indicate sudden jumps that cause numerical diffusion.
+    """
+    ratios = []
+    for axis in (0, 1):
+        slc_a = [slice(None)] * 2
+        slc_b = [slice(None)] * 2
+        slc_a[axis] = slice(None, -1)
+        slc_b[axis] = slice(1, None)
+        a = arr2d[tuple(slc_a)]
+        b = arr2d[tuple(slc_b)]
+        # Avoid division by zero; take max(a/b, b/a)
+        with np.errstate(divide="ignore", invalid="ignore"):
+            r = np.where(b > 0, a / b, np.nan)
+            r = np.fmax(r, np.where(a > 0, b / a, np.nan))
+        finite_r = r[np.isfinite(r)]
+        if len(finite_r):
+            ratios.append(float(np.max(finite_r)))
+    return max(ratios) if ratios else float("nan")
+
+
+def _get_array(region, name: str) -> Optional[np.ndarray]:
+    arr = getattr(region, "Rxy", None)
+    if arr is None:
+        return None
+    return np.asarray(arr.centre)
+
+
+def _collect_region_arrays(mesh) -> dict[str, dict[str, np.ndarray]]:
+    """
+    BoutMesh stores data in regions accessible via mesh.regions (a dict of
+    name -> MeshRegion). Each region has 2D arrays for coordinates and metrics.
+    Falls back to whole-mesh arrays if regions are not available.
+    """
+    arrays_by_region = {}
+
+    # Try structured regions first (preferred)
+    regions = getattr(mesh, "regions", None)
+    if regions:
+        for rindx, region in regions.items():
+            # Mesh uses indices rindx
+            arrays_by_region[region.name] = {
+                "neighbors": {
+                    edge_name: regions[nindx].name if nindx is not None else None
+                    for edge_name, nindx in mesh.connections[rindx].items()
+                },
+                "Rxy": _get_array(region, "Rxy"),
+                "Zxy": _get_array(region, "Zxy"),
+                "dx": _get_array(region, "dx"),
+                "dy": _get_array(region, "dy"),
+                "J": _get_array(region, "J"),
+                "g11": _get_array(region, "g11"),
+                "g22": _get_array(region, "g22"),
+                "g33": _get_array(region, "g33"),
+                "g_11": _get_array(region, "g_11"),
+                "g_22": _get_array(region, "g_22"),
+                "g_33": _get_array(region, "g_33"),
+                "Bxy": _get_array(region, "Bxy"),
+            }
+    else:
+        # Fall back to whole-mesh attributes
+        arrays_by_region["global"] = {
+            "Rxy": _get_array(mesh, "Rxy"),
+            "Zxy": _get_array(mesh, "Zxy"),
+            "dx": _get_array(mesh, "dx"),
+            "dy": _get_array(mesh, "dy"),
+            "J": _get_array(mesh, "J"),
+            "g11": _get_array(mesh, "g11"),
+            "g22": _get_array(mesh, "g22"),
+            "g_11": _get_array(mesh, "g_11"),
+            "g_22": _get_array(mesh, "g_22"),
+            "Bxy": _get_array(mesh, "Bxy"),
+        }
+
+    return arrays_by_region
+
+
+def inspect_mesh(mesh, detail="summary") -> dict:
+    """
+    Inspect a BoutMesh object after calculateRZ() and geometry() have been called.
+    Returns a structured diagnostic dict suitable as a tool result.
+
+    Attribute names (Rxy, dx, J, etc.) are looked up via multiple candidate names
+    to be robust against BoutMesh internals. If a quantity cannot be found,
+    it is reported as null rather than raising an exception.
+    """
+    arrays_by_region = _collect_region_arrays(mesh)
+
+    region_diagnostics = {}
+    all_J = []
+    all_dx = []
+
+    for rname, arrs in arrays_by_region.items():
+        dx = arrs.get("dx")
+        dy = arrs.get("dy")
+        J = arrs.get("J")
+
+        rd = {"neighbors": arrs.get("neighbors", None)}
+
+        if dx is not None:
+            rd["nx"] = dx.shape[1] if dx.ndim == 2 else dx.shape[0]
+            rd["dx_stats"] = _region_stats(dx, f"{rname}.dx")
+            rd["max_adjacent_dx_ratio"] = _max_adjacent_ratio(dx)
+            all_dx.append(dx.ravel())
+
+        if dy is not None:
+            rd["ny"] = dy.shape[0] if dy.ndim == 2 else dy.shape[0]
+            rd["dy_stats"] = _region_stats(dy, f"{rname}.dy")
+            rd["max_adjacent_dy_ratio"] = _max_adjacent_ratio(dy)
+
+        if J is not None:
+            rd["J_stats"] = _region_stats(J, f"{rname}.J")
+            rd["n_negative_jacobian"] = int(np.sum(J <= 0))
+            all_J.append(J.ravel())
+
+        region_diagnostics[rname] = rd
+
+    if detail == "summary":
+        # One-liner per region: name, size, pass/fail
+        region_out = {
+            name: {
+                "nx": rd.get("nx"),
+                "ny": rd.get("ny"),
+                "ok": rd.get("n_negative_jacobian", 0) == 0,
+            }
+            for name, rd in region_diagnostics.items()
+        }
+        return {
+            "detail": "summary",
+            "valid": True,
+            "regions": region_out,
+            "hint": None,  # "Call inspect_mesh(detail='standard') to investigate warnings"
+            # if warnings else None,
+        }
+
+    # Full
+    return {
+        "status": "ok",
+        "regions": region_diagnostics,
+    }
diff --git a/hypnotoad/agent/tools/search.py b/hypnotoad/agent/tools/search.py
new file mode 100644
index 00000000..b63884a3
--- /dev/null
+++ b/hypnotoad/agent/tools/search.py
@@ -0,0 +1,111 @@
+from dataclasses import dataclass
+from typing import Optional
+import inspect
+
+
+@dataclass
+class Chunk:
+    text: str
+    section: str
+    source: str
+    chunk_type: str  # "rst", "function", "usage", "option"
+    name: Optional[str] = None  # function/option name if applicable
+    start_line: Optional[int] = None
+    url: Optional[str] = None
+
+    def to_dict(self) -> dict:
+        d = {
+            "section": self.section,
+            "source": self.source,
+            "chunk_type": self.chunk_type,
+            "text": self.text,
+        }
+        if self.name:
+            d["name"] = self.name
+        if self.start_line:
+            d["start_line"] = self.start_line
+        if self.url:
+            d["url"] = self.url
+        return d
+
+
+def _describe_type(value_type) -> str:
+    """Human-readable type description from value_type field."""
+    if value_type is None:
+        return "any"
+    if isinstance(value_type, (list, tuple)):
+        names = [t.__name__ if t is not None else "None" for t in value_type]
+        return " or ".join(names)
+    return value_type.__name__
+
+
+def extract_option_chunks(known_options) -> list[Chunk]:
+    """
+    Build RAG chunks from the live OptionsFactory.
+    Each option becomes one chunk with its full metadata.
+    Called once at index-build time.
+    """
+    chunks = []
+
+    for name, meta in known_options.items():
+        default = meta.value  # first arg to WithMeta
+        doc = getattr(meta, "doc", "No description available")
+        value_type = getattr(meta, "value_type", None)
+        allowed = getattr(meta, "allowed", None)
+        check_all = getattr(meta, "check_all", None)
+        check_any = getattr(meta, "check_any", None)
+
+        # Build a human-readable description for embedding
+        lines = [
+            f"Option: {name}",
+            f"Default: {default}",
+            f"Description: {doc}",
+        ]
+        if value_type is not None:
+            lines.append(f"Type: {_describe_type(value_type)}")
+        if allowed is not None:
+            lines.append(f"Allowed values: {allowed}")
+        if check_all is not None:
+            checks = check_all if isinstance(check_all, (list, tuple)) else [check_all]
+            for c in checks:
+                try:
+                    lines.append(f"Constraint (all): {inspect.getsource(c).strip()}")
+                except Exception:
+                    lines.append(f"Constraint (all): {c}")
+        if check_any is not None:
+            checks = check_any if isinstance(check_any, (list, tuple)) else [check_any]
+            for c in checks:
+                try:
+                    lines.append(f"Constraint (any): {inspect.getsource(c).strip()}")
+                except Exception:
+                    lines.append(f"Constraint (any): {c}")
+
+        chunks.append(
+            Chunk(
+                text="\n".join(lines),
+                section=f"option: {name}",
+                source="hypnotoad.options_factory (live)",
+                chunk_type="option",
+                name=name,
+            )
+        )
+
+    return chunks
+
+
+class ChunkDatabase:
+    """
+    Index chunks and retrieve based on queries
+    """
+
+    def __init__(self, chunks: list[Chunk]):
+        from rank_bm25 import BM25Okapi
+
+        self.chunks = chunks
+        corpus = [chunk.text.split() for chunk in chunks]
+        self.bm25 = BM25Okapi(corpus)
+
+    def retrieve(self, query: str, k: int = 4) -> list[dict]:
+        scores = self.bm25.get_scores(query.split())
+        top_k = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:k]
+        return [self.chunks[i].to_dict() for i in top_k]
diff --git a/hypnotoad/agent/tools/validate.py b/hypnotoad/agent/tools/validate_settings.py
similarity index 84%
rename from hypnotoad/agent/tools/validate.py
rename to hypnotoad/agent/tools/validate_settings.py
index 7bd35fc4..29748fbd 100644
--- a/hypnotoad/agent/tools/validate.py
+++ b/hypnotoad/agent/tools/validate_settings.py
@@ -8,17 +8,7 @@
 from dataclasses import dataclass
 from enum import Enum
 from typing import Any
-
-from ...cases import tokamak
-from ...core.mesh import BoutMesh
-
-POSSIBLE_OPTIONS = (
-    tokamak.TokamakEquilibrium.user_options_factory.defaults
-    | tokamak.TokamakEquilibrium.nonorthogonal_options_factory.defaults
-    | BoutMesh.user_options_factory.defaults
-)
-
-POSSIBLE_KEYS = [opt for opt in POSSIBLE_OPTIONS]
+from optionsfactory import WithMeta
 
 
 class IssueType(str, Enum):
@@ -125,7 +115,7 @@ def _run_checks(value: Any, meta: WithMeta) -> list[str]:
     return failures
 
 
-def validate_settings(settings: dict = {}) -> dict:
+def validate_settings(possible_options, settings: dict = {}) -> dict:
     """Check settings for common issues before running.
 
     Returns a dictionary with a boolean flag 'valid'
@@ -134,10 +124,31 @@ def validate_settings(settings: dict = {}) -> dict:
     """
     issues = {}
 
+    # Settings that must match due to BOUT++ limitations
+    for key1, key2 in [
+        ("nx_sol_outer", "nx_sol"),
+        ("nx_sol_inner", "nx_sol"),
+        ("nx_pf", "nx_core"),
+    ]:
+        if key1 in settings:
+            if key2 in settings:
+                if settings[key1] != settings[key2]:
+                    issues[key1] = SettingIssue(
+                        issue_type=IssueType.INVALID_VALUE,
+                        message=f"Value of '{key1}' must match '{key2}'. Do not use setting '{key1}'.",
+                    ).to_dict()
+            else:
+                issues[key1] = SettingIssue(
+                    issue_type=IssueType.MISSING,
+                    message=f"Do not use setting '{key1}'. Use setting '{key2}' instead.",
+                ).to_dict()
+
+    possible_keys = [opt for opt in possible_options]
+
     for key, value in settings.items():
         # Unknown keys
-        if key not in POSSIBLE_KEYS:
-            suggestions = difflib.get_close_matches(key, POSSIBLE_KEYS, n=3, cutoff=0.6)
+        if key not in possible_keys:
+            suggestions = difflib.get_close_matches(key, possible_keys, n=3, cutoff=0.6)
             issues[key] = SettingIssue(
                 issue_type=IssueType.UNKNOWN_KEY,
                 message=f"'{key}' is not a recognised Hypnotoad option",
@@ -146,7 +157,7 @@ def validate_settings(settings: dict = {}) -> dict:
             continue
 
         # Validate using WithMeta
-        meta = POSSIBLE_OPTIONS[key]
+        meta = possible_options[key]
         value_type = getattr(meta, "value_type", None)
         allowed = getattr(meta, "allowed", None)
 

From ac3693c954b1c4f5cfc6e6f9cc9ae2325da89532 Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Mon, 2 Mar 2026 20:44:46 -0800
Subject: [PATCH 3/8] cases/tokamak: Fix missing `f` string

String split across lines but the `f` was missing from the second
line.
---
 hypnotoad/cases/tokamak.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hypnotoad/cases/tokamak.py b/hypnotoad/cases/tokamak.py
index 57f61cce..2e2e5fd6 100644
--- a/hypnotoad/cases/tokamak.py
+++ b/hypnotoad/cases/tokamak.py
@@ -1083,12 +1083,12 @@ def describeDoubleNull(self):
         if not np.isclose(lower_psi, self.psi(*lower_x_point)):
             raise ValueError(
                 f"psi-value for lower separatrix {lower_psi} does not match value at "
-                "lower X-point {self.psi(*lower_x_point)}"
+                f"lower X-point {self.psi(*lower_x_point)}"
             )
         if not np.isclose(upper_psi, self.psi(*upper_x_point)):
             raise ValueError(
                 f"psi-value for upper separatrix {upper_psi} does not match value at "
-                "upper X-point {self.psi(*upper_x_point)}"
+                f"upper X-point {self.psi(*upper_x_point)}"
             )
 
         # Find lines along the legs from X-point to target

From 1ec0f902000358ad3dde9562f5c62db82576232b Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Fri, 6 Mar 2026 15:06:53 -0800
Subject: [PATCH 4/8] test_validate: Fix tests, reformat

Now has two arguments, passing possible options first.
---
 hypnotoad/test_suite/test_validate.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/hypnotoad/test_suite/test_validate.py b/hypnotoad/test_suite/test_validate.py
index 65713c2b..5b1a1a21 100644
--- a/hypnotoad/test_suite/test_validate.py
+++ b/hypnotoad/test_suite/test_validate.py
@@ -1,9 +1,17 @@
 from hypnotoad.agent import tools
+from hypnotoad.cases import tokamak
+from hypnotoad.core.mesh import BoutMesh
+
+POSSIBLE_OPTIONS = (
+    tokamak.TokamakEquilibrium.user_options_factory.defaults
+    | tokamak.TokamakEquilibrium.nonorthogonal_options_factory.defaults
+    | BoutMesh.user_options_factory.defaults
+)
 
 
 def test_validate_unknown_key():
     # Using an invalid key should lead to an issue and suggestion
-    assert tools.validate_settings({"nxcore": 10}) == {
+    assert tools.validate_settings(POSSIBLE_OPTIONS, {"nxcore": 10}) == {
         "valid": False,
         "issues": {
             "nxcore": {
@@ -17,7 +25,7 @@ def test_validate_unknown_key():
 
 def test_validate_wrong_type():
     # Using the wrong type
-    assert tools.validate_settings({"nx_core": 3.4}) == {
+    assert tools.validate_settings(POSSIBLE_OPTIONS, {"nx_core": 3.4}) == {
         "valid": False,
         "issues": {
             "nx_core": {
@@ -31,7 +39,9 @@ def test_validate_wrong_type():
 
 
 def test_validate_invalid_value():
-    assert tools.validate_settings({"curvature_type": "nonsense"}) == {
+    assert tools.validate_settings(
+        POSSIBLE_OPTIONS, {"curvature_type": "nonsense"}
+    ) == {
         "valid": False,
         "issues": {
             "curvature_type": {
@@ -45,7 +55,7 @@ def test_validate_invalid_value():
 
 
 def test_validate_out_of_bounds():
-    assert tools.validate_settings({"refine_width": -1.0}) == {
+    assert tools.validate_settings(POSSIBLE_OPTIONS, {"refine_width": -1.0}) == {
         "valid": False,
         "issues": {
             "refine_width": {

From d28021296f2fe41188512829b70392ff6c7eca89 Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Sun, 8 Mar 2026 10:57:45 -0700
Subject: [PATCH 5/8] tools: Improve inspect_mesh and describe_equilibrium

Provide warnings and hints to guide changes to nx values.
Capture the header line from the geqdsk file.
---
 hypnotoad/agent/tools/describe_equilibrium.py |   6 +
 hypnotoad/agent/tools/inspect_mesh.py         | 120 +++++++++++++++---
 2 files changed, 107 insertions(+), 19 deletions(-)

diff --git a/hypnotoad/agent/tools/describe_equilibrium.py b/hypnotoad/agent/tools/describe_equilibrium.py
index 5c4d2271..1e2e774f 100644
--- a/hypnotoad/agent/tools/describe_equilibrium.py
+++ b/hypnotoad/agent/tools/describe_equilibrium.py
@@ -7,6 +7,11 @@ def describe_equilibrium(gridfile) -> dict:
     from ...utils import critical
     import numpy as np
 
+    # Read header line, discard single characters
+    with open(gridfile, "rt") as fh:
+        header = fh.readline()
+    header_tok = [tok for tok in header.split() if len(tok) > 1]
+
     with open(gridfile, "rt") as fh:
         data = _geqdsk.read(fh)
 
@@ -51,6 +56,7 @@ def describe_equilibrium(gridfile) -> dict:
         ]  # Maximum 3
 
     return {
+        "header": header_tok,
         "psi_increasing": psi_bdry_gfile > psi_axis_gfile,
         "magnetic_axis": magnetic_axis,
         "xpoints": xpoints,
diff --git a/hypnotoad/agent/tools/inspect_mesh.py b/hypnotoad/agent/tools/inspect_mesh.py
index 19002b76..842f5ea8 100644
--- a/hypnotoad/agent/tools/inspect_mesh.py
+++ b/hypnotoad/agent/tools/inspect_mesh.py
@@ -14,9 +14,9 @@ def _region_stats(arr: np.ndarray, name: str) -> dict:
         "min": float(np.min(arr)),
         "max": float(np.max(arr)),
         "mean": float(np.mean(arr)),
-        "uniformity_ratio": float(np.max(arr) / np.min(arr))
-        if np.min(arr) > 0
-        else None,
+        "uniformity_ratio": (
+            float(np.max(arr) / np.min(arr)) if np.min(arr) > 0 else None
+        ),
     }
 
 
@@ -44,7 +44,7 @@ def _max_adjacent_ratio(arr2d: np.ndarray) -> float:
 
 
 def _get_array(region, name: str) -> Optional[np.ndarray]:
-    arr = getattr(region, "Rxy", None)
+    arr = getattr(region, name, None)
     if arr is None:
         return None
     return np.asarray(arr.centre)
@@ -103,10 +103,6 @@ def inspect_mesh(mesh, detail="summary") -> dict:
     """
     Inspect a BoutMesh object after calculateRZ() and geometry() have been called.
     Returns a structured diagnostic dict suitable as a tool result.
-
-    Attribute names (Rxy, dx, J, etc.) are looked up via multiple candidate names
-    to be robust against BoutMesh internals. If a quantity cannot be found,
-    it is reported as null rather than raising an exception.
     """
     arrays_by_region = _collect_region_arrays(mesh)
 
@@ -114,51 +110,137 @@ def inspect_mesh(mesh, detail="summary") -> dict:
     all_J = []
     all_dx = []
 
+    errors = []
+    warnings = []
+
     for rname, arrs in arrays_by_region.items():
         dx = arrs.get("dx")
         dy = arrs.get("dy")
         J = arrs.get("J")
+        g22 = arrs.get("g22")
+        g_22 = arrs.get("g_22")
 
-        rd = {"neighbors": arrs.get("neighbors", None)}
+        rd = {
+            "neighbors": arrs.get("neighbors", None),
+            "nx": dx.shape[0],
+            "ny": dx.shape[1],
+            "valid": True,
+        }
 
         if dx is not None:
-            rd["nx"] = dx.shape[1] if dx.ndim == 2 else dx.shape[0]
-            rd["dx_stats"] = _region_stats(dx, f"{rname}.dx")
+            rd["dx"] = _region_stats(dx, f"{rname}.dx")
             rd["max_adjacent_dx_ratio"] = _max_adjacent_ratio(dx)
             all_dx.append(dx.ravel())
 
         if dy is not None:
-            rd["ny"] = dy.shape[0] if dy.ndim == 2 else dy.shape[0]
-            rd["dy_stats"] = _region_stats(dy, f"{rname}.dy")
+            rd["dy"] = _region_stats(dy, f"{rname}.dy")
             rd["max_adjacent_dy_ratio"] = _max_adjacent_ratio(dy)
 
+            if g22 is not None:
+                dlpol = dy / np.sqrt(g22)
+                rd["dlpol_poloidal_cell_size"] = _region_stats(dlpol, f"{rname}.dlpol")
+                rd["max_adjacent_dlpol_ratio"] = _max_adjacent_ratio(dlpol)
+
+            if g_22 is not None:
+                dlpar = dy * np.sqrt(g_22)
+                rd["dlpar_parallel_cell_size"] = _region_stats(dlpar, f"{rname}.dlpar")
+                rd["max_adjacent_dlpar_ratio"] = _max_adjacent_ratio(dlpar)
+
         if J is not None:
-            rd["J_stats"] = _region_stats(J, f"{rname}.J")
+            rd["J"] = _region_stats(J, f"{rname}.J")
             rd["n_negative_jacobian"] = int(np.sum(J <= 0))
             all_J.append(J.ravel())
 
+        for varname in [
+            "dx",
+            "dy",
+            "J",
+            "dlpol_poloidal_cell_size",
+            "dlpar_parallel_cell_size",
+        ]:
+            if "error" in rd[varname]:
+                errors.append(rd[varname]["error"])
+                rd["valid"] = False
+
         region_diagnostics[rname] = rd
 
+    # Check for small dx
+    for rname, rd in region_diagnostics.items():
+        if ("min" in rd["dx"]) and rd["dx"]["min"] < 1e-8:
+            errors.append(f"Too small dx in region {rname}: {rd['dx']['min']}")
+            rd["valid"] = False
+
+    # Large non-uniformity
+    for rname, rd in region_diagnostics.items():
+        ratio = rd["dx"].get("uniformity_ratio", None)
+        if ratio is None:
+            continue
+        if ratio > 1e2:
+            warnings.append(f"Large dx uniformity_ratio in region {rname}: {ratio}")
+
+    # Check for dx in neighboring regions
+    for rname, rd in region_diagnostics.items():
+        dx_mean = rd["dx"].get("mean", None)
+        if dx_mean is None:
+            continue
+
+        # Which setting affects the size of this region?
+        if (rd["neighbors"].get("inner", None) is not None) and (
+            rd["neighbors"].get("outer", None) is not None
+        ):
+            nx_name = "nx_inter_sep"
+        elif rd["neighbors"].get("inner", None) is None:
+            if "core" in rname:
+                nx_name = "nx_core"
+            else:
+                nx_name = "nx_pf"
+        else:
+            nx_name = "nx_sol"
+
+        for direction in ["inner", "outer"]:
+            nname = rd["neighbors"].get(direction, None)
+            if nname is None:
+                continue
+            ndx_mean = region_diagnostics[nname]["dx"].get("mean", None)
+            if ndx_mean is None:
+                continue
+            if dx_mean > 10 * ndx_mean:
+                warnings.append(
+                    f"mean dx in region {rname} is {dx_mean / ndx_mean} times dx in neighbor region {nname}. Increase setting {nx_name}."
+                )
+            if dx_mean < 0.1 * ndx_mean:
+                warnings.append(
+                    f"mean dx in region {rname} is {dx_mean / ndx_mean} times dx in neighbor region {nname}. Decrease setting {nx_name}."
+                )
+
     if detail == "summary":
         # One-liner per region: name, size, pass/fail
         region_out = {
             name: {
                 "nx": rd.get("nx"),
                 "ny": rd.get("ny"),
-                "ok": rd.get("n_negative_jacobian", 0) == 0,
+                "valid": rd["valid"],
             }
             for name, rd in region_diagnostics.items()
         }
         return {
             "detail": "summary",
-            "valid": True,
+            "valid": len(errors) == 0,
+            "errors": errors,
+            "warnings": warnings,
             "regions": region_out,
-            "hint": None,  # "Call inspect_mesh(detail='standard') to investigate warnings"
-            # if warnings else None,
+            "hint": (
+                "Call inspect_mesh(detail='full') to investigate warnings"
+                if len(warnings) > 0
+                else None
+            ),
         }
 
     # Full
     return {
-        "status": "ok",
+        "detail": "full",
+        "valid": len(errors) == 0,
+        "errors": errors,
+        "warnings": warnings,
         "regions": region_diagnostics,
     }

From 9c77c00e461b9a270eec71bc74d814534a663697 Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Sun, 8 Mar 2026 11:40:46 -0700
Subject: [PATCH 6/8] hypnotoad_agent: Fix plot_last_mesh

Some inconsistency in ax vs axis keywords
---
 hypnotoad/agent/hypnotoad_agent.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/hypnotoad/agent/hypnotoad_agent.py b/hypnotoad/agent/hypnotoad_agent.py
index a14e5837..ea3c68ca 100644
--- a/hypnotoad/agent/hypnotoad_agent.py
+++ b/hypnotoad/agent/hypnotoad_agent.py
@@ -556,8 +556,9 @@ def last_settings(self) -> Optional[dict]:
         return self.mesh_history[-1]["settings"]
 
     def plot_last_mesh(self, ax=None):
-        mesh = self.mesh
+        """Plots the most recent successfully generated mesh"""
+        mesh = self.last_mesh
         if mesh is None:
             return
-        ax = mesh.plotPotential(ax=ax)
+        ax = mesh.plotPotential(axis=ax)
         return mesh.plotGridCellEdges(ax=ax)

From 5e991430cefc505de0a564046338df1ead3af9f5 Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Mon, 9 Mar 2026 09:24:15 -0700
Subject: [PATCH 7/8] agent.tools.search: Add ChunkFaissDatabase

Uses FAISS and an embedding model to perform semantic search.
---
 hypnotoad/agent/tools/search.py | 281 +++++++++++++++++++++++++++++++-
 1 file changed, 277 insertions(+), 4 deletions(-)

diff --git a/hypnotoad/agent/tools/search.py b/hypnotoad/agent/tools/search.py
index b63884a3..c216752c 100644
--- a/hypnotoad/agent/tools/search.py
+++ b/hypnotoad/agent/tools/search.py
@@ -1,6 +1,11 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, asdict
 from typing import Optional
 import inspect
+import numpy as np
+from pathlib import Path
+import faiss
+from rank_bm25 import BM25Okapi
+import json
 
 
 @dataclass
@@ -95,12 +100,11 @@ def extract_option_chunks(known_options) -> list[Chunk]:
 
 class ChunkDatabase:
     """
-    Index chunks and retrieve based on queries
+    Index chunks and retrieve based on queries.
+    This database uses BM25 to rank based on keywords.
     """
 
     def __init__(self, chunks: list[Chunk]):
-        from rank_bm25 import BM25Okapi
-
         self.chunks = chunks
         corpus = [chunk.text.split() for chunk in chunks]
         self.bm25 = BM25Okapi(corpus)
@@ -109,3 +113,272 @@ def retrieve(self, query: str, k: int = 4) -> list[dict]:
         scores = self.bm25.get_scores(query.split())
         top_k = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:k]
         return [self.chunks[i].to_dict() for i in top_k]
+
+
+class ChunkFaissDatabase:
+    """
+    Persistent vector database for `Chunk` objects using OpenAI embeddings
+    and a FAISS similarity index.
+
+    Each chunk's embedding is stored in a FAISS index (IndexFlatIP) with
+    L2-normalised vectors so that inner product corresponds to cosine similarity.
+
+    IMPORTANT:
+    The implementation assumes that the order of vectors in the FAISS index
+    matches the order of `self.chunks`. The i-th embedding in the index
+    corresponds to `self.chunks[i]`. The database is therefore append-only
+    and does not support deletion or reordering of chunks without rebuilding
+    the index.
+
+    Data is persisted to disk as:
+      - faiss.index      : serialized FAISS index
+      - manifest.json    : metadata (embedding model, dimension, etc.)
+      - chunks.jsonl     : one JSON-encoded Chunk per line
+
+    """
+
+    def __init__(
+        self,
+        client,
+        model: Optional[str] = None,
+        restore: Optional[Path | str] = None,
+    ):
+        """
+        Parameters
+        ----------
+        client : openai.OpenAI
+            OpenAI client used to generate embeddings.
+
+        model : str, optional
+            Name of embedding model used to generate vectors.
+            Required when creating a new database.
+
+        restore : str or Path, optional
+            Directory containing previously saved database files.
+            If provided, the index and chunks are restored from disk.
+        """
+        self.client = client
+        self.model = model
+        self.chunks = []
+        self.index = None
+        if restore is None:
+            # Start a new index
+            if model is None:
+                raise ValueError("Specify a model for new database")
+            # Create index in add_chunks when embedding size is known
+        else:
+            # Restore database from file
+            self.load(restore)
+            if (model is not None) and (model != self.model):
+                raise ValueError(
+                    f"Model '{model}' not equal to '{self.model}' in restored database {restore}"
+                )
+
+    def add_chunks(self, chunks: list[Chunk]):
+        """
+        Add new chunks to the database.
+
+        For each chunk:
+        1. Compute its embedding using the configured embedding model.
+        2. L2-normalise the embedding (for cosine similarity search).
+        3. Append the embedding to the FAISS index.
+        4. Append the chunk to `self.chunks`.
+
+        The order of addition is preserved, so the FAISS vector at position i
+        corresponds to `self.chunks[i]`.
+
+        Notes
+        -----
+        - This database is append-only. Removing or reordering chunks will
+        break alignment between the FAISS index and `self.chunks`.
+        - All embeddings must have the same dimension as the existing index.
+        - If the index has not yet been created, it will be initialised
+        using the embedding dimension of the first batch.
+
+        Parameters
+        ----------
+        chunks : list[Chunk]
+            Chunks to embed and add to the database.
+
+        Raises
+        ------
+        ValueError
+            If the embedding dimension does not match the existing index.
+        """
+        if not chunks:
+            return
+
+        texts = [chunk.text for chunk in chunks]
+        embeddings = self._calculate_embeddings(texts)
+        X = np.array(embeddings, dtype="float32")
+        faiss.normalize_L2(X)  # for cosine similarity
+
+        if self.index is None:
+            self.index = faiss.IndexFlatIP(X.shape[1])
+        elif self.index.d != X.shape[1]:
+            raise ValueError(f"Embedding dim {X.shape[1]} != index dim {self.index.d}")
+
+        # Add to index and chunks list in the same order
+        # so that indices remain synchronised.
+        self.index.add(X)
+        self.chunks.extend(chunks)
+
+    def retrieve(self, query: str, k: int = 4):
+        """
+        Retrieve the top-k most similar chunks for a query string.
+
+        The query is embedded using the configured embedding model,
+        L2-normalised, and searched against the FAISS index using
+        inner product similarity (equivalent to cosine similarity).
+
+        Parameters
+        ----------
+        query : str
+            Natural-language search query.
+        k : int, default=4
+            Number of top results to return. If k exceeds the number
+            of indexed chunks, it will be clamped.
+
+        Returns
+        -------
+        chunks : list[Chunk]
+            Retrieved chunks in descending similarity order.
+        scores : list[float]
+            Corresponding similarity scores (cosine similarity).
+
+        Notes
+        -----
+        - Returns empty lists if the index is empty.
+        - Scores are inner products of L2-normalised vectors
+        (i.e., cosine similarity in [-1, 1]).
+        """
+        if self.index is None or len(self.chunks) == 0:
+            # Nothing added to database
+            return [], []
+        k = min(k, len(self.chunks))
+
+        query_embedding = self._calculate_embeddings([query])
+        q = np.array(query_embedding, dtype="float32")
+        faiss.normalize_L2(q)
+
+        Dists, Inds = self.index.search(q, k)
+        # If the chunks list and FAISS index are kept in sync
+        # then we can use the returned index directly.
+        # Filter out -1 indices from chunks and scores
+        chunks = []
+        scores = []
+        for i, s in zip(Inds[0], Dists[0]):
+            if i != -1:
+                chunks.append(self.chunks[i])
+                scores.append(s)
+        return chunks, scores
+
+    def _calculate_embeddings(self, texts: list[str]) -> list[list[float]]:
+        """
+        Compute embeddings for a list of texts using the configured model.
+
+        The returned embeddings are in the same order as the input texts.
+
+        Parameters
+        ----------
+        texts : list[str]
+            Text strings to embed.
+
+        Returns
+        -------
+        list[list[float]]
+            List of embedding vectors, one per input text.
+
+        Notes
+        -----
+        - All embeddings have identical dimensionality.
+        - This method does not normalise embeddings; normalisation is
+        performed by the caller before adding to or querying the index.
+        """
+        response = self.client.embeddings.create(model=self.model, input=texts)
+        # Keep order of embeddings as inputs
+        data = sorted(response.data, key=lambda x: x.index)
+        return [item.embedding for item in data]
+
+    def load(self, directory_path: Path | str):
+        """
+        Load a previously saved database from disk.
+
+        This restores:
+        - The FAISS index from 'faiss.index'
+        - Metadata (including embedding model) from 'manifest.json'
+        - Chunk objects from 'chunks.jsonl'
+
+        Parameters
+        ----------
+        directory_path : str or Path
+            Directory containing the saved database files.
+
+        Raises
+        ------
+        ValueError
+            If the directory does not exist or required files are missing.
+            If the number of chunks does not match the number of vectors
+            in the FAISS index.
+        """
+        directory_path = Path(directory_path)
+        if not directory_path.is_dir():
+            raise ValueError(f"Expected directory, got: {directory_path}")
+
+        self.index = faiss.read_index(str(directory_path / "faiss.index"))
+
+        self.chunks = []
+        with open(directory_path / "chunks.jsonl", "r", encoding="utf-8") as f:
+            for line in f:
+                d = json.loads(line)
+                self.chunks.append(Chunk(**d))
+
+        manifest_path = directory_path / "manifest.json"
+        with open(manifest_path, "r", encoding="utf-8") as f:
+            manifest = json.load(f)
+        self.model = manifest["embedding_model"]
+        # basic integrity check
+        if self.index.ntotal != len(self.chunks):
+            raise ValueError(
+                f"Index ntotal={self.index.ntotal} != chunks={len(self.chunks)}"
+            )
+
+    def save(self, directory_path: Path | str):
+        """
+        Save the FAISS index, metadata, and chunks to disk.
+
+        The following files are written to the specified directory:
+        - faiss.index      : serialized FAISS index
+        - manifest.json    : embedding model, dimension, and counts
+        - chunks.jsonl     : one JSON-encoded Chunk per line
+
+        Parameters
+        ----------
+        directory_path : str or Path
+            Target directory. Created if it does not exist.
+
+        Notes
+        -----
+        - This method overwrites existing files in the directory.
+        - The saved database can be restored by passing the same directory
+        to the constructor via the `restore` parameter.
+        """
+        directory_path = Path(directory_path)
+        directory_path.mkdir(parents=True, exist_ok=True)
+
+        faiss.write_index(self.index, str(directory_path / "faiss.index"))
+
+        with open(directory_path / "chunks.jsonl", "w", encoding="utf-8") as f:
+            # Write chunks on separate lines
+            for ch in self.chunks:
+                f.write(json.dumps(asdict(ch), ensure_ascii=False) + "\n")
+
+        manifest = {
+            "schema_version": 1,
+            "embedding_model": self.model,
+            "embedding_dim": int(self.index.d) if self.index is not None else None,
+            "count": len(self.chunks),
+            "metric": "cosine_ip_normalized",
+        }
+        with open(directory_path / "manifest.json", "w", encoding="utf-8") as f:
+            json.dump(manifest, f, ensure_ascii=False, indent=2)

From f6659a7798fa4f822399d2b909ba0b515ca70608 Mon Sep 17 00:00:00 2001
From: Ben Dudson <dudson2@llnl.gov>
Date: Mon, 9 Mar 2026 13:09:10 -0700
Subject: [PATCH 8/8] agent: Add experience database and manual page

Experience database provides a tool to search when finding
solutions.
---
 doc/agent.rst                       | 206 ++++++++
 doc/index.rst                       |   1 +
 hypnotoad/agent/hypnotoad_agent.py  | 748 +++++++++++++++++++++++-----
 hypnotoad/agent/tools/experience.py |  74 +++
 4 files changed, 891 insertions(+), 138 deletions(-)
 create mode 100644 doc/agent.rst
 create mode 100644 hypnotoad/agent/tools/experience.py

diff --git a/doc/agent.rst b/doc/agent.rst
new file mode 100644
index 00000000..b56ca233
--- /dev/null
+++ b/doc/agent.rst
@@ -0,0 +1,206 @@
+HypnotoadAgent: LLM-Assisted Mesh Generation
+============================================
+
+The ``HypnotoadAgent`` provides an LLM-driven interface to the Hypnotoad
+mesh generator. It combines:
+
+- Structured tool calls (validate, run, inspect)
+- A searchable options reference
+- A growing database of past experience
+- Interactive or notebook-based workflows
+
+The agent is designed to help users explore the settings space efficiently,
+diagnose mesh quality issues, and improve results over time.
+
+Overview
+--------
+
+The agent wraps Hypnotoad’s normal workflow:
+
+1. Inspect equilibrium geometry
+2. Choose mesh settings
+3. Validate settings
+4. Run mesh generation
+5. Inspect mesh quality
+6. Iterate until acceptable
+
+The LLM can call tools to perform these steps programmatically, rather than
+guessing option names or values.
+
+Basic Usage
+-----------
+
+Create an agent by providing a GEQDSK (or equivalent) equilibrium file:
+
+.. code-block:: python
+
+   from hypnotoad.agent import HypnotoadAgent
+
+   agent = HypnotoadAgent(
+       gridfile="example.geqdsk",
+       model="gpt-4o-mini",
+       embedding_model="text-embedding-3-large",
+       experience_db="experience_store"
+   )
+
+Then interact with it:
+
+.. code-block:: python
+
+   response = agent.chat(
+       "Generate a mesh for this equilibrium with high resolution near the X-point."
+   )
+   print(response)
+
+In a Jupyter notebook, use:
+
+.. code-block:: python
+
+   agent.chat_nb("Create a double-null mesh with refined SOL resolution")
+
+This displays tool calls and outputs in collapsible sections.
+
+Main Tools
+----------
+
+The agent exposes the following tools to the LLM:
+
+get_equilibrium_info
+^^^^^^^^^^^^^^^^^^^^
+
+Describes magnetic topology, X-point locations, and geometric scale.
+Call this at the start of a session to inform resolution choices.
+
+validate_settings
+^^^^^^^^^^^^^^^^^
+
+Checks a settings dictionary against Hypnotoad’s options schema:
+
+- Unknown option names
+- Type mismatches
+- Violations of allowed ranges
+- Constraint failures
+
+Always validate before running Hypnotoad.
+
+run_hypnotoad
+^^^^^^^^^^^^^
+
+Runs the mesh generator with a given settings dictionary.
+
+Returns:
+
+- ``status`` (success or error)
+- ``mesh_index`` (for later inspection)
+- ``diagnostics`` (summary metrics and warnings)
+
+inspect_mesh
+^^^^^^^^^^^^
+
+Examines mesh quality at different detail levels:
+
+- ``summary`` — global pass/fail and warnings
+- ``standard`` — per-region statistics and connectivity
+- ``full`` — cell-level diagnostics
+
+Typical workflow:
+
+.. code-block:: python
+
+   validate_settings(...)
+   run_hypnotoad(...)
+   inspect_mesh(detail="summary")
+
+search_hypnotoad_options
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Searches the settings reference using BM25 keyword search.
+
+Use this when:
+
+- You do not know the exact option name
+- Validation reports an unknown key
+- You want to understand default values or allowed ranges
+
+Do not guess option names.
+
+search_experience
+^^^^^^^^^^^^^^^^^
+
+Searches previously stored mesh-generation experience.
+
+Use this when:
+
+- You encounter a warning or error
+- You are working with a similar topology
+- You want to see what worked before
+
+This enables the agent to improve over time.
+
+Experience Database
+-------------------
+
+The agent can store summaries of successful (and failed) runs in a
+persistent FAISS-based vector database.
+
+After generating a mesh, call:
+
+.. code-block:: python
+
+   agent.add_experience_report()
+   agent.save_experience()
+
+Each experience entry includes:
+
+- Topology and goal
+- Overrides (differences from defaults)
+- Key lessons (symptom → change → outcome)
+- Diagnostics summary
+
+On future runs, the agent can retrieve similar experiences and reuse
+successful parameter combinations.
+
+Session History
+---------------
+
+All meshes generated in a session are stored in memory:
+
+.. code-block:: python
+
+   agent.list_meshes()
+
+You can inspect or plot the most recent mesh:
+
+.. code-block:: python
+
+   agent.inspect_mesh(mesh_index=-1, detail="standard")
+   agent.plot_last_mesh()
+
+Best Practices
+--------------
+
+1. Always call ``get_equilibrium_info`` at the start.
+2. Use ``search_hypnotoad_options`` rather than guessing option names.
+3. Validate settings before running.
+4. Inspect at ``summary`` level before requesting ``full`` detail.
+5. Change only a few options per iteration.
+6. Save experience after successful runs.
+
+Limitations
+-----------
+
+- The agent does not automatically delete meshes from history.
+- The experience database is append-only.
+- Large conversations may exceed model context limits.
+- Mesh quality metrics depend on Hypnotoad’s diagnostics.
+
+Future Extensions
+-----------------
+
+Possible enhancements include:
+
+- Hybrid search (BM25 + vector) over options and experience
+- Automatic option-diff computation for experience entries
+- Objective-driven optimisation loops
+- Integration with CI pipelines
+
diff --git a/doc/index.rst b/doc/index.rst
index 84ee7e34..174b802d 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -21,6 +21,7 @@ Welcome to hypnotoad's documentation!
    tips-and-tricks
    nonorthogonal-tips
    utilities
+   agent
    provenance-tracking
    _apidoc/modules
    other-configurations
diff --git a/hypnotoad/agent/hypnotoad_agent.py b/hypnotoad/agent/hypnotoad_agent.py
index ea3c68ca..08de4ef1 100644
--- a/hypnotoad/agent/hypnotoad_agent.py
+++ b/hypnotoad/agent/hypnotoad_agent.py
@@ -2,6 +2,7 @@
 import json
 import pprint
 from typing import Optional
+from pathlib import Path
 from ..cases import tokamak
 from ..core.mesh import BoutMesh
 from . import tools
@@ -32,6 +33,9 @@
 - search_hypnotoad_options(query, k): Search the Hypnotoad settings options
   reference, returning the k most relevant results. Do not guess settings keys.
 
+- search_experience(query, k): Search past experience, returning the k most
+  relevant results.
+
 ## Workflow
 
 Follow these steps in order. Do not skip steps or change their sequence.
@@ -82,9 +86,10 @@
      exclude the excess.
    - The suggested psinorm_pf and psinorm_sol values.
 3. Call search_hypnotoad_options to find options relevant to the error.
-4. Construct corrected settings, then call validate_settings.
+4. Before changing more than 2 options, call search_experience for similar cases.
+5. Construct corrected settings, then call validate_settings.
    Fix all reported issues before proceeding.
-5. Call run_hypnotoad with the corrected settings.
+6. Call run_hypnotoad with the corrected settings.
 
 Repeat Step 2a up to 3 times. If the mesh still fails after 3 attempts,
 report the full error history to the user and ask for guidance.
@@ -125,120 +130,182 @@
 - If uncertain about any step, ask the user before proceeding.
 """
 
-# Anthropic API format
-TOOLS = [
+TOOLS_OPENAI = [
     {
-        "name": "validate_settings",
-        "description": "Validate a settings dict before running Hypnotoad.",
-        "input_schema": {
-            "type": "object",
-            "properties": {"settings": {"type": "object"}},
-            "required": ["settings"],
+        "type": "function",
+        "function": {
+            "name": "get_equilibrium_info",
+            "description": (
+                "Describe the magnetic equilibrium and geometry from the input grid file. "
+                "Call this at the start of a session (before choosing mesh settings) or when you need "
+                "to understand topology (single-null / double-null), X-point locations, and size/shape "
+                "metrics that inform resolution and spacing choices."
+            ),
+            "parameters": {"type": "object", "properties": {}, "required": []},
         },
     },
     {
-        "name": "run_hypnotoad",
-        "description": "Run Hypnotoad mesh generator with a settings dict. Returns success/error and mesh metadata.",
-        "input_schema": {
-            "type": "object",
-            "properties": {
-                "settings": {
-                    "type": "object",
-                    "description": "Hypnotoad settings dictionary",
-                }
+        "type": "function",
+        "function": {
+            "name": "validate_settings",
+            "description": (
+                "Validate a Hypnotoad settings dict against the OptionsFactory schema (types, allowed values, constraints). "
+                "Call this BEFORE run_hypnotoad when you have changed settings or are unsure about option names/values. "
+                "If validation reports unknown keys, use search_hypnotoad_options to find the correct option names."
+            ),
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "settings": {
+                        "type": "object",
+                        "description": "Hypnotoad settings dictionary to validate. Keys must be exact option paths; values must match types/constraints.",
+                    },
+                },
+                "required": ["settings"],
             },
-            "required": ["settings"],
         },
     },
     {
-        "name": "inspect_mesh",
-        "description": """Inspect mesh quality after generation. Use detail levels
-progressively:
-- 'summary'  (default): global pass/fail + warning list. Always call this first.
-- 'standard': per-region statistics + connections. Call when summary has warnings.
-- 'full':     worst-cell locations, metric tensors, interface continuity details.
-              Call only to diagnose a specific problem identified at standard level.""",
-        "input_schema": {
-            "type": "object",
-            "properties": {"detail": {"type": "string"}},
-            "required": [],
+        "type": "function",
+        "function": {
+            "name": "run_hypnotoad",
+            "description": (
+                "Run the Hypnotoad mesh generator with a settings dict. "
+                "Returns success/error and a mesh_index for later inspection. "
+                "Best practice: validate_settings -> run_hypnotoad -> inspect_mesh(detail='summary') "
+                "and only increase detail if needed."
+            ),
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "settings": {
+                        "type": "object",
+                        "description": "Hypnotoad settings dictionary. Use validate_settings first; do not guess option names.",
+                    },
+                },
+                "required": ["settings"],
+            },
         },
     },
     {
-        "name": "list_meshes",
-        "description": "List all meshes generated in this session with their "
-        "index, pass/fail status, and settings",
-        "input_schema": {"type": "object", "properties": {}, "required": []},
+        "type": "function",
+        "function": {
+            "name": "inspect_mesh",
+            "description": (
+                "Inspect mesh quality after generation.\n"
+                "Use detail levels progressively:\n"
+                "- 'summary'  (default): global pass/fail + warning list. Always call this first.\n"
+                "- 'standard': per-region statistics + connections. Call when summary has warnings.\n"
+                "- 'full':     worst-cell locations, metric tensors, interface continuity details.\n"
+                "             Call only to diagnose a specific problem identified at standard level."
+            ),
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "mesh_index": {
+                        "type": "integer",
+                        "description": (
+                            "Index of mesh to inspect, as returned by run_hypnotoad. "
+                            "Use -1 to inspect the most recent mesh."
+                        ),
+                        "default": -1,
+                    },
+                    "detail": {
+                        "type": "string",
+                        "description": "Inspection detail level.",
+                        "enum": ["summary", "standard", "full"],
+                        "default": "summary",
+                    },
+                },
+                "required": [],
+            },
+        },
     },
     {
-        "name": "search_hypnotoad_options",
-        "description": (
-            "Search the Hypnotoad settings options reference. Returns the k most "
-            "relevant options matching the query, each with its name, default value, "
-            "type, allowed values, and description.\n\n"
-            "Use this tool when:\n"
-            "- You need to know the exact name of an option (e.g. 'what option "
-            "controls poloidal spacing near the X-point?')\n"
-            "- You need to know the default, type, or allowed values for a specific "
-            "option before setting it\n"
-            "- validate_settings has returned an unknown_key error and you want to "
-            "find the correct option name\n"
-            "- You are constructing a settings dict and want to check what options "
-            "are available for a particular aspect of the mesh\n\n"
-            "Do not guess option names. Always use this tool if you are unsure."
-        ),
-        "input_schema": {
-            "type": "object",
-            "properties": {
-                "query": {
-                    "type": "string",
-                    "description": (
-                        "Natural language description of the option or behaviour you "
-                        "are looking for. Can be a partial option name, a physical "
-                        "concept, or a description of what you want to control. "
-                        "Examples:\n"
-                        "- 'X-point poloidal spacing'\n"
-                        "- 'number of radial points in SOL'\n"
-                        "- 'target plate resolution'\n"
-                        "- 'nx_inter_sep'\n"
-                        "- 'orthogonal mesh'"
-                    ),
+        "type": "function",
+        "function": {
+            "name": "list_meshes",
+            "description": (
+                "List all meshes generated in this session with their mesh_index, pass/fail status, warning count, and settings."
+            ),
+            "parameters": {"type": "object", "properties": {}, "required": []},
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "search_hypnotoad_options",
+            "description": (
+                "Search the Hypnotoad settings options reference (BM25 keyword search over option docs). "
+                "Returns the k most relevant options matching the query, each with its name, default value, type, allowed values, and description.\n\n"
+                "Use this tool when:\n"
+                "- You need the exact name/path of an option\n"
+                "- You need default/type/allowed values before setting it\n"
+                "- validate_settings reports unknown keys\n"
+                "- You are exploring how to control a specific aspect of the mesh\n\n"
+                "Do not guess option names. Always use this tool if you are unsure."
+            ),
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": (
+                            "Natural language description of the option/behaviour. Can be a partial option name or concept. Examples:\n"
+                            "- 'X-point poloidal spacing'\n"
+                            "- 'number of radial points in SOL'\n"
+                            "- 'target plate resolution'\n"
+                            "- 'nx_intersep'\n"
+                            "- 'orthogonal mesh'"
+                        ),
+                    },
+                    "k": {
+                        "type": "integer",
+                        "description": "Number of options to return. Default 4. Use up to 10 when exploring.",
+                        "default": 4,
+                        "minimum": 1,
+                        "maximum": 10,
+                    },
                 },
-                "k": {
-                    "type": "integer",
-                    "description": (
-                        "Number of options to return. Default 4. Use a larger value "
-                        "(up to 10) when exploring an unfamiliar area of the settings "
-                        "space, or when the first results do not contain what you need."
-                    ),
-                    "default": 4,
-                    "minimum": 1,
-                    "maximum": 10,
+                "required": ["query"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "search_experience",
+            "description": (
+                "Search the saved experience database of prior Hypnotoad runs (successful and failed). "
+                "Use this BEFORE making large settings changes, especially when you see a warning/error or when working with a similar topology.\n\n"
+                "Typical uses:\n"
+                "- 'connected double-null second X-point distortion'\n"
+                "- 'nx_intersep too low warnings'\n"
+                "- 'mesh smoothing interface continuity'\n"
+                "- paste a short error/warning message fragment to find prior fixes\n\n"
+                "Returns the top-k most relevant experience reports with key overrides and lessons."
+            ),
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "Natural-language query, option names, topology keywords, or error/warning fragments.",
+                    },
+                    "k": {
+                        "type": "integer",
+                        "description": "Number of experience records to return. Default 4. Use up to 10 when exploring.",
+                        "default": 4,
+                        "minimum": 1,
+                        "maximum": 10,
+                    },
                 },
+                "required": ["query"],
             },
-            "required": ["query"],
         },
     },
 ]
 
-
-def to_openai_tools(anthropic_tools: list[dict]) -> list[dict]:
-    """Convert Anthropic-format tool definitions to OpenAI format."""
-    return [
-        {
-            "type": "function",
-            "function": {
-                "name": t["name"],
-                "description": t["description"],
-                "parameters": t["input_schema"],
-            },
-        }
-        for t in anthropic_tools
-    ]
-
-
-TOOLS_OPENAI = to_openai_tools(TOOLS)
-
 POSSIBLE_OPTIONS = (
     tokamak.TokamakEquilibrium.user_options_factory.defaults
     | tokamak.TokamakEquilibrium.nonorthogonal_options_factory.defaults
@@ -281,8 +348,74 @@ def default_handler(title, func, *args, **kwargs):
 
 
 class HypnotoadAgent:
-    def __init__(self, gridfile, base_url: str = None, api_key: str = None, model=None):
+    """
+    LLM-driven controller for the Hypnotoad mesh generator.
+
+    The agent exposes tools that the language model may call (validate_settings,
+    run_hypnotoad, inspect_mesh, list_meshes, search_hypnotoad_options,
+    search_experience, get_equilibrium_info). It maintains an in-memory
+    session (messages + mesh_history) and optional persistent experience
+    storage (ChunkFaissDatabase).
+
+    Parameters
+    ----------
+    gridfile : str or Path
+        Path to the equilibrium/grid file (GEQDSK or compatible format) used
+        to construct meshes.
+
+    base_url : str, optional
+        Optional base URL for the OpenAI-compatible API.
+
+    api_key : str, optional
+        API key for the OpenAI-compatible client.
+
+    model : str, optional
+        Chat/completion model id used for agent reasoning and tools.
+
+    embedding_model : str, optional
+        Embedding model id used to create experience embeddings.
 
+    experience_db : str or Path, optional
+        Directory to restore/save the persistent experience FAISS store.
+
+    Notes
+    -----
+    - The agent is primarily a thin orchestration layer; heavy lifting is
+      delegated to tools in `tools.*` and to the OpenAI client for LLM calls.
+    - Mesh generation results are stored in `self.mesh_history` as entries
+      containing at least {'settings', 'mesh', 'diagnostics'}.
+    """
+
+    def __init__(
+        self,
+        gridfile,
+        base_url: str = None,
+        api_key: str = None,
+        model: str = None,
+        embedding_model: str = None,
+        experience_db: Path | str = None,
+    ):
+        """
+        Initialize the HypnotoadAgent.
+
+        Sets up the OpenAI client, model/tool bindings, BM25 options index,
+        optional FAISS-based experience database, and an empty mesh history.
+
+        Parameters
+        ----------
+        gridfile : str | Path
+            Path to the equilibrium/grid file for mesh generation.
+        base_url : str, optional
+            Base URL for the OpenAI-compatible API.
+        api_key : str, optional
+            API key for the OpenAI-compatible API.
+        model : str, optional
+            Model id for chat completions.
+        embedding_model : str, optional
+            Embedding model id for the experience database.
+        experience_db : str | Path, optional
+            Directory to restore the experience database from.
+        """
         from openai import OpenAI
         from .tools.search import ChunkDatabase, extract_option_chunks
 
@@ -290,13 +423,6 @@ def __init__(self, gridfile, base_url: str = None, api_key: str = None, model=No
         self.logger = logger.getChild(self.__class__.__name__)
         self.client = OpenAI(base_url=base_url, api_key=api_key)
         self.model = model
-
-        # Check which models are available
-        available_models = [model.id for model in self.client.models.list()]
-        if model not in available_models:
-            raise ValueError(
-                f"Model {model} not available. Available models are {available_models}"
-            )
         self.tools = TOOLS_OPENAI
 
         # Maintain chat history. This is sent to the LLM at each call
@@ -317,7 +443,7 @@ def __init__(self, gridfile, base_url: str = None, api_key: str = None, model=No
             },
             "run_hypnotoad": {
                 "function": self.run_hypnotoad,
-                "signature": {"settings": dict},
+                "signature": {"settings": dict, "notes": str},
             },
             "inspect_mesh": {
                 "function": self._inspect_mesh,
@@ -328,20 +454,87 @@ def __init__(self, gridfile, base_url: str = None, api_key: str = None, model=No
                 "function": self.search_hypnotoad_options,
                 "signature": {"query": str, "k": int},
             },
+            "search_experience": {
+                "function": self.search_experience,
+                "signature": {"query": str, "k": int},
+            },
         }
 
         # Index available options so that the LLM can query
         self.options_db = ChunkDatabase(extract_option_chunks(POSSIBLE_OPTIONS))
 
+        # Database of past experience
+        self.experience_db = None
+        self.experience_db_path = experience_db
+        if embedding_model or experience_db:
+            self._init_experience_db(
+                embedding_model=embedding_model, experience_db=experience_db
+            )
+
         # Store generated meshes
         self.mesh_history = []
 
+    def _init_experience_db(
+        self,
+        embedding_model: str = None,
+        experience_db: Path | str = None,
+    ):
+        """Initialise the experience database.
+
+        embedding_model : str, optional
+            Embedding model id for the experience database.
+        experience_db : str | Path, optional
+            Directory to restore the experience database from.
+        """
+        from .tools.search import ChunkFaissDatabase
+
+        if experience_db:
+            experience_db = Path(experience_db)
+            if not experience_db.is_dir():
+                self.logger.warning(
+                    f"Experience DB '{experience_db}' does not exist. Will be created on save."
+                )
+                experience_db = None  # Don't try to restore
+
+        self.experience_db = ChunkFaissDatabase(
+            self.client, model=embedding_model, restore=experience_db
+        )
+
     def chat(
         self, user_input: str, max_iterations: int = 20, task_handler=default_handler
     ) -> str:
         """
-        task_handler(title, func, *args, **kwargs) : function
-            Wrapper that should print the title and then run func(*args, **kwargs)
+        Drive an interactive agent loop with the LLM, handling tool calls.
+
+        This appends the user's input to the internal message history, sends the
+        conversation to the LLM, and executes any tool calls returned by the LLM.
+        Tool executions are wrapped and dispatched through `task_handler` so the
+        caller can capture, display, or redirect output.
+
+        Parameters
+        ----------
+        user_input : str
+            Natural-language instruction or question for the agent.
+        max_iterations : int, default=20
+            Maximum number of LLM iterations (tool-call cycles) to perform.
+        task_handler : callable
+            Signature: task_handler(title: str, func: Callable[[], Any]) -> Any.
+            Used to run tool calls; allows UI integration (e.g., capturing output).
+
+        Returns
+        -------
+        str
+            The assistant's final textual reply (may be empty string).
+
+        Notes
+        -----
+        - Tool call arguments are expected to be JSON strings and will be parsed.
+        - Tool results are appended to the conversation as 'tool' messages so the
+          LLM can continue reasoning with tool outputs.
+        - This method mutates `self.messages`. Consider cloning if you want an
+          ephemeral reasoning call that doesn't alter session history.
+        - The function protects against malformed tool arguments but tool errors
+          are returned as structured error objects to the LLM.
         """
         self.logger.debug(f"User input: {user_input}")
         self.messages.append({"role": "user", "content": user_input})
@@ -363,19 +556,21 @@ def chat(
                 {
                     "role": "assistant",
                     "content": msg.content,  # may be None
-                    "tool_calls": [
-                        {
-                            "id": tc.id,
-                            "type": "function",
-                            "function": {
-                                "name": tc.function.name,
-                                "arguments": tc.function.arguments,  # keep as string
-                            },
-                        }
-                        for tc in msg.tool_calls
-                    ]
-                    if msg.tool_calls
-                    else None,
+                    "tool_calls": (
+                        [
+                            {
+                                "id": tc.id,
+                                "type": "function",
+                                "function": {
+                                    "name": tc.function.name,
+                                    "arguments": tc.function.arguments,  # keep as string
+                                },
+                            }
+                            for tc in msg.tool_calls
+                        ]
+                        if msg.tool_calls
+                        else None
+                    ),
                 }
             )
 
@@ -396,14 +591,19 @@ def chat(
                         # Wrap the tool call in a function to pass to task_handler
                         # This enables output to be captured and redirected in
                         # the user interface.
-                        def run_task():
+
+                        # Use default arguments to avoid potential late-binding
+                        # closure bug if task_handler defers tasks.
+                        tc_name = tc.function.name
+
+                        def run_task(tc_name=tc_name, args=args):
                             print(
-                                f"Calling {tc.function.name}\nInputs: {pprint.pformat(args)}",
+                                f"Calling {tc_name}\nInputs: {pprint.pformat(args)}",
                                 flush=True,
                             )
 
                             try:
-                                tool = self.tool_registry[tc.function.name]
+                                tool = self.tool_registry[tc_name]
                                 # Normalise the arguments to match signature
                                 result = tool["function"](
                                     **normalise_arguments(tool["signature"], args)
@@ -430,7 +630,28 @@ def run_task():
         return "Exceeded maximum iterations. See log for details."
 
     def chat_nb(self, user_input: str, max_iterations: int = 20):
-        """Wrapper that handles model output in a Jupyter notebook"""
+        """
+        Notebook-friendly wrapper around `chat` that captures tool output in
+        collapsible UI widgets (ipywidgets).
+
+        Parameters
+        ----------
+        user_input : str
+            User instruction to pass to the agent.
+        max_iterations : int, default=20
+            Maximum number of LLM iterations.
+
+        Returns
+        -------
+        None
+            Prints the final assistant text and presents interactive UI elements
+            for tool execution logs.
+
+        Notes
+        -----
+        - This method requires Jupyter/IPython (ipywidgets). It is a convenience
+          wrapper and does not change agent semantics.
+        """
 
         import ipywidgets as widgets
         from IPython.display import display
@@ -460,8 +681,45 @@ def run_step(title, func, *args, **kwargs):
         )
         print(result)
 
-    def run_hypnotoad(self, settings: dict = {}) -> dict:
-        """Run Hypnotoad with given settings dict"""
+    def run_hypnotoad(
+        self, settings: Optional[dict] = None, notes: Optional[str] = None
+    ) -> dict:
+        """
+        Run the Hypnotoad mesh generator using the provided settings.
+
+        This method:
+        - Loads the equilibrium from self.gridfile with provided settings,
+        - Constructs a BoutMesh, runs the standard processing (calculateRZ,
+            geometry, etc.),
+        - Computes diagnostics via tools.inspect_mesh(detail='summary'),
+        - Appends a dictionary to `self.mesh_history` with keys:
+            {'settings', 'mesh', 'diagnostics'}.
+
+        Parameters
+        ----------
+        settings : dict, optional
+            Hypnotoad settings dictionary. If None, defaults are used.
+
+        Returns
+        -------
+        dict
+            Structured result with at minimum:
+            - status: 'success' or 'error'
+            - mesh_index: integer index into mesh_history (when success)
+            - n_meshes: total number of saved meshes
+            - diagnostics: diagnostics dict (when success)
+            - message: error message (when failure)
+            - hint: optional next-step hint
+
+        Notes
+        -----
+        - Call validate_settings before run_hypnotoad when possible.
+        - Exceptions during reading or mesh generation are caught and returned
+          as structured errors (status='error').
+        """
+        settings = settings or {}
+        if notes:
+            print(notes)
         try:
             # Read the grid file
             with open(self.gridfile, "rt") as fh:
@@ -474,7 +732,9 @@ def run_hypnotoad(self, settings: dict = {}) -> dict:
             mesh.geometry()
             idx = len(self.mesh_history)
             diagnostics = tools.inspect_mesh(mesh, detail="summary")
-            self.mesh_history.append({"settings": settings, "mesh": mesh})
+            self.mesh_history.append(
+                {"settings": settings, "mesh": mesh, "diagnostics": diagnostics}
+            )
             return {
                 "status": "success",
                 "mesh_index": idx,  # <-- LLM uses this for inspect_mesh
@@ -492,8 +752,31 @@ def run_hypnotoad(self, settings: dict = {}) -> dict:
 
     def _inspect_mesh(self, mesh_index: int = -1, detail: str = "summary") -> dict:
         """
-        Inspect a previously generated mesh by index.
-        mesh_index: index from run_hypnotoad result. -1 = most recent (default).
+        Inspect a stored mesh by index and return diagnostics.
+
+        This is a thin wrapper around tools.inspect_mesh that selects the mesh
+        from `self.mesh_history`.
+
+        Parameters
+        ----------
+        mesh_index : int, default=-1
+            Index of the mesh to inspect. -1 selects the most recent mesh.
+        detail : str, default='summary'
+            Level of inspection: 'summary', 'standard', or 'full'.
+
+        Returns
+        -------
+        dict
+            The same structure returned by tools.inspect_mesh, or an error object
+            with keys:
+            - status: 'error'
+            - message: error text
+            - hint: optional usage hint
+
+        Raises
+        ------
+        None
+            All exceptions are captured and returned as structured error dicts.
         """
         if len(self.mesh_history) == 0:
             return {
@@ -511,7 +794,27 @@ def _inspect_mesh(self, mesh_index: int = -1, detail: str = "summary") -> dict:
         return tools.inspect_mesh(mesh, detail=detail)
 
     def list_meshes(self) -> dict:
-        """Summarise all mesh attempts in this session."""
+        """
+        Return a summary of all meshes generated in this session.
+
+        The returned object contains:
+        - n_meshes: int
+        - meshes: list of dicts, each containing:
+            - mesh_index: int
+            - valid: bool (diagnostics.get('valid', False))
+            - n_warnings: int (diagnostics.get('n_warnings', 0))
+            - settings: dict (the settings used to produce the mesh)
+
+        Returns
+        -------
+        dict
+            Session-level mesh summary.
+
+        Notes
+        -----
+        - This is a lightweight listing intended for quick inspection by the LLM.
+        - For in-depth diagnostics call inspect_mesh on a specific mesh_index.
+        """
         return {
             "n_meshes": len(self.mesh_history),
             "meshes": [
@@ -527,9 +830,24 @@ def list_meshes(self) -> dict:
 
     def get_equilibrium_info(self) -> dict:
         """
-        Describe the magnetic equilibrium and geometry. Call this at the start
-        of a session or when you need to understand the physics before choosing
-        mesh settings. Returns metrics that inform resolution and spacing choices.
+        Describe the magnetic equilibrium and geometry associated with self.gridfile.
+
+        This function returns physics-informed metrics that guide mesh choices,
+        such as topology (single-null/double-null), X-point locations, device
+        extents, and shape proxies. It is intended to be called at session start
+        or before choosing mesh settings.
+
+        Returns
+        -------
+        dict
+            Either a description dict (topology, key coordinates, scalar metrics),
+            or an error object: {'status': 'error', 'message': str}.
+
+        Notes
+        -----
+        - Implementation calls tools.describe_equilibrium(self.gridfile).
+        - The returned structure should be concise (a few scalars + short textual
+          indicators) so that it fits well into the model context.
         """
         try:
             return tools.describe_equilibrium(self.gridfile)
@@ -537,28 +855,182 @@ def get_equilibrium_info(self) -> dict:
             return {"status": "error", "message": str(e)}
 
     def search_hypnotoad_options(self, query: str, k: int = 4) -> list[dict]:
-        """ """
+        """
+        Search the options reference for matching Hypnotoad settings.
+
+        This wraps the BM25-based `self.options_db` lookup and returns
+        JSON-serializable option descriptors suitable for LLM consumption.
+
+        Parameters
+        ----------
+        query : str
+            Natural language or partial option name to search for.
+        k : int, default=4
+            Number of results to return.
+
+        Returns
+        -------
+        list[dict]
+            List of option summaries. Each dict should contain at least:
+            - name/path (exact configuration key)
+            - default value
+            - type
+            - allowed values or constraints (if known)
+            - short description or example
+
+        Notes
+        -----
+        - The LLM should call this before guessing option names or setting unknown keys.
+        - This method returns structured dicts (not Chunk objects) to keep tool
+          results easy to parse by the LLM.
+        """
         return self.options_db.retrieve(query, k)
 
+    def search_experience(self, query: str, k: int = 4):
+        if self.experience_db is None:
+            return []
+        chunks, scores = self.experience_db.retrieve(query, k)
+        return [
+            {"text": c.text, "score": s, "source": c.source, "section": c.section}
+            for c, s in zip(chunks, scores)
+        ]
+
     @property
     def last_mesh(self) -> Optional[BoutMesh]:
-        """The last successfully generated mesh. Can be None."""
+        """
+        The most recent successfully generated BoutMesh, or None.
+
+        Returns
+        -------
+        BoutMesh or None
+            The mesh object for programmatic inspection/plotting.
+        """
         if len(self.mesh_history) == 0:
             return None
         return self.mesh_history[-1]["mesh"]
 
     @property
     def last_settings(self) -> Optional[dict]:
-        """Return the settings used to create the most recent successful mesh.
-        Can be None."""
+        """
+        The settings dict used to generate the most recent successful mesh,
+        or None if no successful mesh exists.
+
+        Returns
+        -------
+        dict or None
+            The resolved settings dict (defaults applied) for the last mesh.
+        """
         if len(self.mesh_history) == 0:
             return None
         return self.mesh_history[-1]["settings"]
 
     def plot_last_mesh(self, ax=None):
-        """Plots the most recent successfully generated mesh"""
+        """
+        Plot the most recent successfully generated mesh.
+
+        Parameters
+        ----------
+        ax : matplotlib.axes.Axes, optional
+            Optional axis to draw into. If None, the mesh's default plotting
+            behavior will create or return an axis.
+
+        Returns
+        -------
+        matplotlib.axes.Axes or None
+            The axis containing the plotted mesh, or None if no mesh exists.
+
+        Notes
+        -----
+        - This convenience method delegates to the BoutMesh plotting helpers:
+          mesh.plotPotential() and mesh.plotGridCellEdges().
+        """
         mesh = self.last_mesh
         if mesh is None:
             return
         ax = mesh.plotPotential(axis=ax)
         return mesh.plotGridCellEdges(ax=ax)
+
+    def add_experience_report(self, embedding_model: Optional[str] = None):
+        """
+        Summarize the most recent run and add an 'experience' Chunk to the
+        experience database.
+
+        Behavior:
+          - Constructs a compact summary (using an ephemeral LLM call) that
+            includes: topology, goal, overrides (diff from defaults),
+            3-6 lessons (symptom→change→outcome), and diagnostics summary.
+          - Creates a Chunk(section='experience', chunk_type='experience')
+            with the summary text.
+          - Computes embedding(s) and adds them to the experience DB via
+            self.experience_db.add_chunks([chunk]).
+
+        Returns
+        -------
+        None
+
+        Raises
+        ------
+        ValueError
+            If no experience DB is configured (self.experience_db is None)
+            and no embedding_model is provided.
+
+        Notes
+        -----
+        - Should use a one-shot LLM call (not append to self.messages) to avoid
+          corrupting the ongoing conversational history.
+        - The helper should compute `overrides` as the diff between the last
+          settings and OptionsFactory defaults for compactness and reproducibility.
+        """
+        from .tools import experience
+        from .tools.search import Chunk
+
+        if self.experience_db is None:
+            if embedding_model is None:
+                raise ValueError(
+                    "No experience DB configured and no embedding_model provided."
+                )
+            from .tools.search import ChunkFaissDatabase
+
+            self.experience_db = ChunkFaissDatabase(self.client, model=embedding_model)
+
+        # Generate a summary including key lessons learned
+        summary = self.chat(experience.SUMMARY_PROMPT)
+
+        ch = Chunk(
+            text=summary,
+            section="experience",
+            source="experience",
+            chunk_type="experience",
+        )
+        # Add chunk to the database
+        self.experience_db.add_chunks([ch])
+
+    def save_experience(self, path: Path | str = None):
+        """
+        Persist the experience database to disk.
+
+        Parameters
+        ----------
+        path : str or Path, optional
+            Destination directory. If None, uses the path provided at
+            initialization (self.experience_db_path). If that is also None,
+            a ValueError is raised.
+
+        Returns
+        -------
+        None
+
+        Raises
+        ------
+        ValueError
+            If no destination path is provided and no experience DB path was
+            configured during initialization.
+        """
+        if self.experience_db is None:
+            return
+        if path is None:
+            # Use the path given to init (may be None)
+            path = self.experience_db_path
+        if path is None:
+            raise ValueError("No path given to save_experience()")
+        self.experience_db.save(path)
diff --git a/hypnotoad/agent/tools/experience.py b/hypnotoad/agent/tools/experience.py
new file mode 100644
index 00000000..65eee83d
--- /dev/null
+++ b/hypnotoad/agent/tools/experience.py
@@ -0,0 +1,74 @@
+"""
+Data and routines for summarizing mesh generation experience.
+Intended to build a database that improves future tasks.
+"""
+
+SUMMARY_PROMPT = """
+You are writing an “Experience Chunk” for a RAG knowledge base used by an LLM agent that operates the hypnotoad mesh generator.
+
+Goal: Produce a compact, highly searchable, technically accurate summary of this run that will help future agents solve similar mesh-generation tasks. The chunk will be embedded for semantic search and also indexed for keyword/BM25 search.
+
+Write ONLY the Experience Chunk text. Do not include JSON, code blocks, or extra commentary.
+
+INPUTS YOU WILL RECEIVE (conceptually):
+- run_status: "success" or "fail"
+- equilibrium_summary: topology, number of X-points, any notable geometry/topology notes
+- goal_summary: what the agent was trying to achieve (resolution goals, speed vs quality tradeoffs, etc.)
+- defaults: the default options (from OptionsFactory)
+- effective_options: the full options used for this run (after defaults + overrides)
+- overrides: a dict of options that differ from defaults (already computed)
+- diagnostics: key metrics and checks (mesh sizes, quality metrics, runtime, warnings, errors)
+- artifacts: paths/URLs to config files, logs, and mesh output (if any)
+
+STYLE AND CONTENT RULES
+- Be concise but information-dense. Prefer short lines and bullets.
+- Include option names exactly as they appear in the configuration (preserve nesting/paths).
+- Focus on what changed from defaults, what symptoms were observed, and why the changes helped.
+- Avoid speculation. If you don’t know why something helped, say “reason unclear”.
+- Include keywords that improve retrieval: topology terms, common warning/error phrases, and key option names.
+- If run_status is "fail", emphasize the error signature and last attempted overrides, and suggest the most plausible next changes (max 3) grounded in the observed failure.
+
+OUTPUT FORMAT (follow exactly)
+Line 1: [SUCCESS] or [FAIL] | topology=<...> | goal=<short> | eq=<short fingerprint or identifier> | version=<git sha or version if available>
+
+Section: Situation
+- 2–5 bullets describing the equilibrium/topology and the objective.
+
+Section: Key overrides (diff from defaults)
+- Group overrides by subsystem if possible (e.g., “geometry”, “spacing”, “x-point handling”, “smoothing”, “solver/integration”).
+- List 5–20 overrides max. Each line:
+  - <option_path>: <value>  (default: <default_value>) — <1 short clause describing intent>
+- If there are more than 20 overrides, include the 20 most consequential and add one line:
+  - (N more overrides omitted)
+
+Section: Observations and lessons
+- 3–8 bullets, each must follow this pattern:
+  - Symptom: <what was observed>
+    Change: <option(s) changed>
+    Outcome: <what improved/what happened>
+    Why: <brief rationale> (or “reason unclear”)
+- If run_status is "fail", replace “Outcome” with “Result” and focus on:
+  - error message / warning text (quote short fragments, <= 15 words)
+  - where it occurred (stage: parsing, equilibrium, region detection, mesh generation, smoothing, output)
+  - the most plausible next changes (max 3)
+
+Section: Results (or Failure details)
+- If success: include the most important diagnostics:
+  - runtime, mesh dimensions, region count, min cell size (if available), quality checks summary
+  - warnings (if any) as a short list
+- If fail:
+  - error_signature: <concise stable identifier; include key exception class/message fragment>
+  - last_good_state: <if any> else “none”
+  - what_to_try_next: 1–3 bullets (must be concrete option edits or checks)
+
+Section: Artifacts
+- config: <path or url if available>
+- log: <path or url if available>
+- output: <path or url if available>
+- notes: <optional 1 line; e.g. “replay by running …” but keep it short>
+
+IMPORTANT
+- Do not include any sensitive or irrelevant information.
+- Do not include raw stack traces; only short error fragments.
+- Ensure the chunk is self-contained: a future reader should understand what worked/failed and what to try next.
+"""