From ee480b4c323007de894137f7f051f3814e9dc0c1 Mon Sep 17 00:00:00 2001
From: JNK234 <jwalapuramnarasimha@gmail.com>
Date: Thu, 26 Feb 2026 21:23:54 -0600
Subject: [PATCH 01/12] demo2: add crisis-triage NetLogo model

---
 demos/crisis-triage/crisis-triage.nlogo | 706 ++++++++++++++++++++++++
 1 file changed, 706 insertions(+)
 create mode 100644 demos/crisis-triage/crisis-triage.nlogo

diff --git a/demos/crisis-triage/crisis-triage.nlogo b/demos/crisis-triage/crisis-triage.nlogo
new file mode 100644
index 0000000..83d28d1
--- /dev/null
+++ b/demos/crisis-triage/crisis-triage.nlogo
@@ -0,0 +1,706 @@
+extensions [ llm ]
+
+globals [
+  llm-ready?
+  config-path
+  triage-template-path
+  dispatcher-template-path
+  processed-basic
+  processed-expert
+  processed-coordinator
+  escalated-count
+  seeded-crises
+  case-arrival-probability
+]
+
+breed [cases case]
+breed [basic-agents basic-agent]
+breed [expert-agents expert-agent]
+breed [coordinators coordinator]
+
+turtles-own [
+  tier
+  capacity
+  current-load
+  processed-count
+]
+
+cases-own [
+  incident-summary
+  reported-impact
+  severity-band
+  severity-score
+  queue-state
+  assigned-tier
+  assigned-agent
+  handling-notes
+  created-at
+]
+
+to setup
+  clear-all
+  set config-path "demos/crisis-triage/config.txt"
+  set triage-template-path "demos/crisis-triage/triage-template.yaml"
+  set dispatcher-template-path "demos/crisis-triage/dispatcher-template.yaml"
+
+  set processed-basic 0
+  set processed-expert 0
+  set processed-coordinator 0
+  set escalated-count 0
+
+  set seeded-crises 12
+  set case-arrival-probability 0.25
+
+  setup-llm
+  setup-responders
+  create-initial-cases seeded-crises
+  reset-ticks
+end
+
+to setup-llm
+  set llm-ready? false
+  carefully [
+    if file-exists? config-path [
+      llm:load-config config-path
+      set llm-ready? true
+    ]
+  ] [
+    set llm-ready? false
+    print (word "LLM setup fallback to heuristic triage: " error-message)
+  ]
+end
+
+to setup-responders
+  create-basic-agents 7 [
+    set tier "basic"
+    set capacity 2
+    set current-load 0
+    set processed-count 0
+    set color 57
+    set size 1.6
+    set shape "circle"
+    setxy (-13 + random-float 6) (-12 + random-float 24)
+  ]
+
+  create-expert-agents 4 [
+    set tier "expert"
+    set capacity 2
+    set current-load 0
+    set processed-count 0
+    set color 15
+    set size 1.8
+    set shape "circle"
+    setxy (-3 + random-float 6) (-12 + random-float 24)
+  ]
+
+  create-coordinators 2 [
+    set tier "coordinator"
+    set capacity 3
+    set current-load 0
+    set processed-count 0
+    set color 105
+    set size 2.1
+    set shape "circle"
+    setxy (8 + random-float 6) (-12 + random-float 24)
+  ]
+end
+
+to create-initial-cases [n]
+  repeat n [ spawn-random-case ]
+end
+
+to spawn-random-case
+  let incident-bank (list
+    (list "Server room smoke alarm" "Power instability in two hospital wings")
+    (list "Water main rupture" "Transit junction flooded during rush hour")
+    (list "School bus collision" "Multiple injuries and blocked arterial road")
+    (list "Warehouse fire flare-up" "Toxic plume reported near residential area")
+    (list "Regional telecom outage" "Emergency call latency above safe threshold")
+    (list "Chemical lab leak" "Evacuation radius requested by fire command")
+    (list "Bridge vibration alert" "Potential structural failure during peak traffic")
+    (list "Heat wave brownout" "Critical care equipment on backup power")
+    (list "Subway security incident" "Crowd panic and platform injuries")
+    (list "Data center cooling loss" "City payment systems offline")
+  )
+
+  let picked one-of incident-bank
+  create-cases 1 [
+    set tier "case"
+    set capacity 0
+    set current-load 0
+    set processed-count 0
+
+    set incident-summary item 0 picked
+    set reported-impact item 1 picked
+    set severity-band "unassessed"
+    set severity-score -1
+    set queue-state "new"
+    set assigned-tier "none"
+    set assigned-agent nobody
+    set handling-notes ""
+    set created-at ticks
+
+    set color yellow
+    set size 1.3
+    set shape "circle"
+    setxy (random-xcor) (max-pycor - random-float 6)
+  ]
+end
+
+to go
+  if random-float 1 < case-arrival-probability [
+    spawn-random-case
+  ]
+
+  triage-new-cases
+  route-triaged-cases
+  coordinator-rebalance
+  process-assigned-cases
+
+  tick
+end
+
+to triage-new-cases
+  ask cases with [queue-state = "new"] [
+    perform-triage
+  ]
+end
+
+to perform-triage
+  let llm-response ""
+
+  if llm-ready? [
+    carefully [
+      set llm-response llm:chat-with-template triage-template-path (list
+        ["incident" incident-summary]
+        ["impact" reported-impact]
+        ["elapsed_ticks" (word ticks)]
+        ["known_context" "Municipal crisis operations center with three response tiers"]
+      )
+    ] [
+      set llm-response ""
+    ]
+  ]
+
+  if llm-response = "" [
+    set llm-response heuristic-severity-report incident-summary reported-impact
+  ]
+
+  set severity-band extract-severity-label llm-response incident-summary reported-impact
+  set severity-score severity-score-from-band severity-band
+  set queue-state "triaged"
+  set handling-notes (word "TRIAGE " llm-response)
+  set color color-for-band severity-band
+end
+
+to-report heuristic-severity-report [summary impact]
+  let merged (word summary " " impact)
+
+  if (position "collision" merged != false)
+     or (position "toxic" merged != false)
+     or (position "evacuation" merged != false)
+     or (position "critical care" merged != false)
+     or (position "structural" merged != false) [
+    report "SEVERITY: CRITICAL"
+  ]
+
+  if (position "fire" merged != false)
+     or (position "outage" merged != false)
+     or (position "flooded" merged != false)
+     or (position "injuries" merged != false) [
+    report "SEVERITY: HIGH"
+  ]
+
+  report "SEVERITY: MODERATE"
+end
+
+to-report extract-severity-label [assessment summary impact]
+  let text (word assessment " " summary " " impact)
+
+  if (position "CRITICAL" text != false) or (position "critical" text != false) [
+    report "critical"
+  ]
+
+  if (position "HIGH" text != false) or (position "high" text != false) [
+    report "high"
+  ]
+
+  if (position "MODERATE" text != false) or (position "moderate" text != false) [
+    report "moderate"
+  ]
+
+  if (position "LOW" text != false) or (position "low" text != false) [
+    report "low"
+  ]
+
+  report "moderate"
+end
+
+to-report severity-score-from-band [band]
+  if band = "low" [ report 25 ]
+  if band = "moderate" [ report 55 ]
+  if band = "high" [ report 80 ]
+  report 95
+end
+
+to route-triaged-cases
+  let queue sort-by [[a b] -> [severity-score] of a > [severity-score] of b] (sort (cases with [queue-state = "triaged"]))
+  foreach queue [ queued-case ->
+    dispatch-case queued-case
+  ]
+end
+
+to dispatch-case [target-case]
+  let preferred-tier dispatch-recommendation target-case
+  let final-tier available-tier preferred-tier
+
+  if final-tier = "hold" [
+    ask target-case [
+      set handling-notes (word handling-notes " | waiting-capacity")
+    ]
+    stop
+  ]
+
+  let worker select-worker final-tier
+  if worker = nobody [ stop ]
+
+  if final-tier != preferred-tier [
+    set escalated-count escalated-count + 1
+  ]
+
+  ask worker [
+    set current-load current-load + 1
+  ]
+
+  ask target-case [
+    set queue-state "assigned"
+    set assigned-tier final-tier
+    set assigned-agent worker
+    set color color-for-tier final-tier
+    set handling-notes (word handling-notes " | routed:" final-tier)
+    set ycor ycor - 4
+  ]
+end
+
+to-report dispatch-recommendation [target-case]
+  let default-tier severity-to-default-tier [severity-band] of target-case
+
+  if not llm-ready? [
+    report default-tier
+  ]
+
+  let llm-response ""
+  carefully [
+    set llm-response llm:chat-with-template dispatcher-template-path (list
+      ["severity" [severity-band] of target-case]
+      ["incident" [incident-summary] of target-case]
+      ["basic_load" (word count cases with [queue-state = "assigned" and assigned-tier = "basic"])]
+      ["expert_load" (word count cases with [queue-state = "assigned" and assigned-tier = "expert"])]
+      ["coordinator_load" (word count cases with [queue-state = "assigned" and assigned-tier = "coordinator"])]
+    )
+  ] [
+    set llm-response ""
+  ]
+
+  if llm-response = "" [ report default-tier ]
+
+  let chosen extract-route-label llm-response
+  if chosen = "unknown" [ report default-tier ]
+  report chosen
+end
+
+to-report extract-route-label [response]
+  if (position "COORDINATOR" response != false) or (position "coordinator" response != false) [
+    report "coordinator"
+  ]
+
+  if (position "EXPERT" response != false) or (position "expert" response != false) [
+    report "expert"
+  ]
+
+  if (position "BASIC" response != false) or (position "basic" response != false) [
+    report "basic"
+  ]
+
+  report "unknown"
+end
+
+to-report severity-to-default-tier [band]
+  if band = "low" [ report "basic" ]
+  if band = "moderate" [ report "expert" ]
+  if band = "high" [ report "expert" ]
+  report "coordinator"
+end
+
+to-report available-tier [preferred-tier]
+  if preferred-tier = "basic" [
+    if any? basic-agents with [current-load < capacity] [ report "basic" ]
+    if any? expert-agents with [current-load < capacity] [ report "expert" ]
+    if any? coordinators with [current-load < capacity] [ report "coordinator" ]
+    report "hold"
+  ]
+
+  if preferred-tier = "expert" [
+    if any? expert-agents with [current-load < capacity] [ report "expert" ]
+    if any? coordinators with [current-load < capacity] [ report "coordinator" ]
+    if any? basic-agents with [current-load < capacity] [ report "basic" ]
+    report "hold"
+  ]
+
+  if any? coordinators with [current-load < capacity] [ report "coordinator" ]
+  if any? expert-agents with [current-load < capacity] [ report "expert" ]
+  report "hold"
+end
+
+to-report select-worker [tier-name]
+  if tier-name = "basic" [
+    if any? basic-agents with [current-load < capacity] [
+      report min-one-of basic-agents with [current-load < capacity] [current-load]
+    ]
+  ]
+
+  if tier-name = "expert" [
+    if any? expert-agents with [current-load < capacity] [
+      report min-one-of expert-agents with [current-load < capacity] [current-load]
+    ]
+  ]
+
+  if tier-name = "coordinator" [
+    if any? coordinators with [current-load < capacity] [
+      report min-one-of coordinators with [current-load < capacity] [current-load]
+    ]
+  ]
+
+  report nobody
+end
+
+to coordinator-rebalance
+  if not any? coordinators [ stop ]
+
+  let risky-basic one-of cases with [
+    queue-state = "assigned" and
+    assigned-tier = "basic" and
+    severity-score >= 70
+  ]
+  if risky-basic != nobody [
+    reassign-case risky-basic "expert" "risk escalation"
+  ]
+
+  let critical-expert one-of cases with [
+    queue-state = "assigned" and
+    assigned-tier = "expert" and
+    severity-score >= 90
+  ]
+  if critical-expert != nobody [
+    reassign-case critical-expert "coordinator" "critical escalation"
+  ]
+end
+
+to reassign-case [target-case new-tier reason]
+  if [assigned-tier] of target-case = new-tier [ stop ]
+
+  let new-worker select-worker new-tier
+  if new-worker = nobody [ stop ]
+
+  let old-worker [assigned-agent] of target-case
+  if old-worker != nobody [
+    ask old-worker [
+      set current-load max (list 0 (current-load - 1))
+    ]
+  ]
+
+  ask new-worker [
+    set current-load current-load + 1
+  ]
+
+  ask target-case [
+    set assigned-tier new-tier
+    set assigned-agent new-worker
+    set color color-for-tier new-tier
+    set handling-notes (word handling-notes " | coordinator-reassign:" reason)
+  ]
+
+  set escalated-count escalated-count + 1
+end
+
+to process-assigned-cases
+  ask cases with [queue-state = "assigned"] [
+    let completion completion-chance assigned-tier severity-band
+    if random-float 1 < completion [
+      finalize-case self
+    ]
+  ]
+end
+
+to-report completion-chance [tier-name band]
+  if tier-name = "basic" [ report 0.12 ]
+  if tier-name = "expert" [
+    if band = "high" [ report 0.27 ]
+    if band = "critical" [ report 0.2 ]
+    report 0.22
+  ]
+
+  if band = "critical" [ report 0.34 ]
+  report 0.28
+end
+
+to finalize-case [target-case]
+  let tier-name [assigned-tier] of target-case
+  let worker [assigned-agent] of target-case
+
+  if worker != nobody [
+    ask worker [
+      set current-load max (list 0 (current-load - 1))
+      set processed-count processed-count + 1
+    ]
+  ]
+
+  if tier-name = "basic" [
+    set processed-basic processed-basic + 1
+  ]
+  if tier-name = "expert" [
+    set processed-expert processed-expert + 1
+  ]
+  if tier-name = "coordinator" [
+    set processed-coordinator processed-coordinator + 1
+  ]
+
+  ask target-case [
+    set queue-state "resolved"
+    set color 7
+    set assigned-agent nobody
+    set ycor min-pycor + random-float 3
+    set label word "resolved " severity-band
+  ]
+end
+
+to-report color-for-band [band]
+  if band = "low" [ report 45 ]
+  if band = "moderate" [ report 25 ]
+  if band = "high" [ report 15 ]
+  report 125
+end
+
+to-report color-for-tier [tier-name]
+  if tier-name = "basic" [ report 57 ]
+  if tier-name = "expert" [ report 15 ]
+  report 105
+end
+
+@#$#@#$#@
+GRAPHICS-WINDOW
+230
+10
+747
+528
+-1
+-1
+15.0
+1
+10
+1
+1
+1
+0
+1
+1
+1
+-16
+16
+-16
+16
+1
+1
+1
+ticks
+30.0
+
+BUTTON
+20
+20
+88
+53
+setup
+setup
+NIL
+1
+T
+OBSERVER
+NIL
+NIL
+NIL
+NIL
+1
+
+BUTTON
+96
+20
+164
+53
+go
+go
+T
+1
+T
+OBSERVER
+NIL
+NIL
+NIL
+NIL
+1
+
+BUTTON
+20
+60
+164
+93
+new-case
+spawn-random-case
+NIL
+1
+T
+OBSERVER
+NIL
+NIL
+NIL
+NIL
+1
+
+MONITOR
+20
+110
+163
+155
+LLM Active
+llm-ready?
+17
+1
+11
+
+MONITOR
+20
+160
+164
+205
+New Queue
+count cases with [queue-state = "new"]
+17
+1
+11
+
+MONITOR
+20
+210
+164
+255
+Triaged Queue
+count cases with [queue-state = "triaged"]
+17
+1
+11
+
+MONITOR
+20
+260
+164
+305
+Assigned Queue
+count cases with [queue-state = "assigned"]
+17
+1
+11
+
+MONITOR
+20
+310
+164
+355
+Escalations
+escalated-count
+17
+1
+11
+
+MONITOR
+20
+360
+164
+405
+Done by Basic
+processed-basic
+17
+1
+11
+
+MONITOR
+20
+410
+164
+455
+Done by Expert
+processed-expert
+17
+1
+11
+
+MONITOR
+20
+460
+164
+505
+Done by Coordinator
+processed-coordinator
+17
+1
+11
+
+@#$#@#$#@
+## Crisis Triage with Tiered Intelligence Coordination
+
+This demo simulates emergency incident flow through three responder tiers:
+
+1. Basic agents handle low complexity cases.
+2. Expert agents handle moderate and high severity cases.
+3. Coordinators handle critical cases and rebalance misrouted overload.
+
+Each new incident is triaged with `llm:chat-with-template` using `triage-template.yaml`.
+Routing then uses `dispatcher-template.yaml` and capacity-aware fallback logic.
+
+### Run
+
+1. Update `demos/crisis-triage/config.txt` with your provider + credentials.
+2. Click `setup`.
+3. Click `go`.
+4. Use `new-case` to inject incidents manually.
+
+If LLM config is unavailable, the model automatically uses deterministic heuristic triage.
+@#$#@#$#@
+default
+true
+0
+Polygon -7500403 true true 150 5 40 250 150 205 260 250
+
+circle
+false
+0
+Circle -7500403 true true 0 0 300
+@#$#@#$#@
+NetLogo 6.4.0
+@#$#@#$#@
+@#$#@#$#@
+@#$#@#$#@
+@#$#@#$#@
+@#$#@#$#@
+default
+0.0
+-0.2 0 0.0 1.0
+0.0 1 1.0 0.0
+0.2 0 0.0 1.0
+link direction
+true
+0
+Line -7500403 true 150 150 90 180
+Line -7500403 true 150 150 210 180
+@#$#@#$#@
+1
+@#$#@#$#@

From 1f4a1439166c3637166900d34c25329f38cfeb5d Mon Sep 17 00:00:00 2001
From: JNK234 <jwalapuramnarasimha@gmail.com>
Date: Thu, 26 Feb 2026 21:23:57 -0600
Subject: [PATCH 02/12] demo2: add triage severity prompt template

---
 demos/crisis-triage/triage-template.yaml | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 demos/crisis-triage/triage-template.yaml

diff --git a/demos/crisis-triage/triage-template.yaml b/demos/crisis-triage/triage-template.yaml
new file mode 100644
index 0000000..3f60565
--- /dev/null
+++ b/demos/crisis-triage/triage-template.yaml
@@ -0,0 +1,12 @@
+system: "You are an emergency triage specialist. Assess risk conservatively and consistently."
+template: |
+  Incident summary: {incident}
+  Reported impact: {impact}
+  Time since report (ticks): {elapsed_ticks}
+  Context: {known_context}
+
+  Classify this incident severity for a municipal response team.
+
+  Return exactly two lines:
+  SEVERITY: LOW|MODERATE|HIGH|CRITICAL
+  JUSTIFICATION: <= 18 words grounded in impact and urgency

From addfb8553d171911ec637f4d90504b051e8720ee Mon Sep 17 00:00:00 2001
From: JNK234 <jwalapuramnarasimha@gmail.com>
Date: Thu, 26 Feb 2026 21:24:00 -0600
Subject: [PATCH 03/12] demo2: add dispatcher routing prompt template

---
 demos/crisis-triage/dispatcher-template.yaml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 demos/crisis-triage/dispatcher-template.yaml

diff --git a/demos/crisis-triage/dispatcher-template.yaml b/demos/crisis-triage/dispatcher-template.yaml
new file mode 100644
index 0000000..291a3a4
--- /dev/null
+++ b/demos/crisis-triage/dispatcher-template.yaml
@@ -0,0 +1,15 @@
+system: "You are a crisis operations dispatcher. Route incidents to maximize response quality under load."
+template: |
+  Severity band: {severity}
+  Incident summary: {incident}
+
+  Current active load:
+  BASIC={basic_load}
+  EXPERT={expert_load}
+  COORDINATOR={coordinator_load}
+
+  Choose the best tier for this incident considering both severity and current load.
+
+  Return exactly two lines:
+  ROUTE: BASIC|EXPERT|COORDINATOR
+  REASON: <= 18 words

From 6abc7b31442a7c87fed0cf29be2d8f03c3ef5b9c Mon Sep 17 00:00:00 2001
From: JNK234 <jwalapuramnarasimha@gmail.com>
Date: Thu, 26 Feb 2026 21:24:09 -0600
Subject: [PATCH 04/12] demo2: add crisis triage LLM config

---
 demos/crisis-triage/config.txt | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 demos/crisis-triage/config.txt

diff --git a/demos/crisis-triage/config.txt b/demos/crisis-triage/config.txt
new file mode 100644
index 0000000..0c9fcc1
--- /dev/null
+++ b/demos/crisis-triage/config.txt
@@ -0,0 +1,20 @@
+# Crisis Triage Demo LLM configuration
+# Path is loaded by crisis-triage.nlogo via llm:load-config
+
+# Recommended local/default option (no cloud key required)
+provider=ollama
+model=llama3.2:latest
+base_url=http://localhost:11434
+
+# Runtime behavior
+temperature=0.2
+max_tokens=120
+timeout_seconds=45
+
+# Optional cloud fallback examples (commented)
+# provider=openai
+# api_key=YOUR_OPENAI_API_KEY_HERE
+# model=gpt-4o-mini
+# temperature=0.2
+# max_tokens=120
+# timeout_seconds=45

From 3ba54f7fb1f8eece431d97a3f4a10dd33ba257cd Mon Sep 17 00:00:00 2001
From: JNK234 <jwalapuramnarasimha@gmail.com>
Date: Thu, 26 Feb 2026 21:24:12 -0600
Subject: [PATCH 05/12] demo2: add crisis triage documentation

---
 demos/crisis-triage/README.md | 101 ++++++++++++++++++++++++++++++++++
 1 file changed, 101 insertions(+)
 create mode 100644 demos/crisis-triage/README.md

diff --git a/demos/crisis-triage/README.md b/demos/crisis-triage/README.md
new file mode 100644
index 0000000..f3a3f17
--- /dev/null
+++ b/demos/crisis-triage/README.md
@@ -0,0 +1,101 @@
+# Demo 2: Crisis Triage with Tiered Intelligence Coordination
+
+This demo models a municipal crisis desk where incidents are triaged by an LLM, routed to one of three response tiers, and dynamically escalated when capacity or risk changes.
+
+## What it demonstrates
+
+- Tiered responders: `basic`, `expert`, `coordinator`
+- LLM-driven severity assessment via `triage-template.yaml`
+- LLM-assisted dispatch recommendation via `dispatcher-template.yaml`
+- Capacity-aware fallback routing when a preferred tier is saturated
+- Coordinator-triggered escalation for risky or critical in-flight cases
+- Automatic heuristic fallback if LLM config/provider is unavailable
+
+## Deliverables
+
+- `crisis-triage.nlogo`: NetLogo simulation model
+- `triage-template.yaml`: Severity prompt template
+- `dispatcher-template.yaml`: Routing prompt template
+- `config.txt`: LLM extension configuration
+- `tests/`: Automated validation tests
+
+## Model architecture
+
+### Agent tiers
+
+- `basic-agents`
+  - Highest volume, low-complexity workload
+  - Lower completion probability for hard cases
+- `expert-agents`
+  - Moderate/high severity handling
+  - Better completion rates on difficult incidents
+- `coordinators`
+  - Critical incidents and system-level balancing
+  - Reassign risky cases from lower tiers
+
+### Incident lifecycle
+
+1. New incident is created (`queue-state = "new"`)
+2. Triage step classifies severity (`low/moderate/high/critical`)
+3. Dispatch step chooses preferred tier and applies capacity fallback
+4. Case is processed by assigned tier
+5. Coordinator may reassign active risky cases
+6. Resolved incidents are counted per tier
+
+## Files and paths
+
+All files for this demo live in:
+
+`demos/crisis-triage/`
+
+The NetLogo model loads these by relative path:
+
+- `demos/crisis-triage/config.txt`
+- `demos/crisis-triage/triage-template.yaml`
+- `demos/crisis-triage/dispatcher-template.yaml`
+
+## Run instructions
+
+1. Ensure NetLogo has the `llm` extension available.
+2. Configure provider settings in `config.txt`.
+3. Open `crisis-triage.nlogo` in NetLogo.
+4. Click `setup`.
+5. Click `go`.
+6. Optionally click `new-case` to inject additional incidents.
+
+## LLM behavior
+
+- Severity is requested using strict output formatting:
+  - `SEVERITY: LOW|MODERATE|HIGH|CRITICAL`
+- Routing is requested using strict output formatting:
+  - `ROUTE: BASIC|EXPERT|COORDINATOR`
+- Parser logic in the model extracts these tags and falls back safely when missing.
+
+## Heuristic fallback mode
+
+If LLM config fails to load or provider calls fail:
+
+- `llm-ready?` monitor is `false`
+- Severity uses keyword-driven deterministic rules
+- Routing uses severity-to-tier defaults + capacity fallback
+
+This keeps the simulation functional offline.
+
+## Test suite
+
+Tests are static validations that do not call external APIs.
+
+Run from repository root:
+
+```bash
+python -m unittest discover -s demos/crisis-triage/tests -p "test_*.py" -v
+```
+
+Coverage includes:
+
+- Required files present
+- NetLogo model includes tiered breeds and key procedures
+- Model references both YAML templates and config
+- Template variables match model substitution keys
+- Config includes required LLM keys
+- README contains usage, architecture, and test instructions

From 37387314ccaa178dc7810fc4f6b12d39855beede Mon Sep 17 00:00:00 2001
From: JNK234 <jwalapuramnarasimha@gmail.com>
Date: Thu, 26 Feb 2026 21:24:15 -0600
Subject: [PATCH 06/12] demo2: add crisis triage validation tests

---
 demos/crisis-triage/tests/README.md           |  15 +++
 .../test_crisis_triage.cpython-312.pyc        | Bin 0 -> 6594 bytes
 .../crisis-triage/tests/test_crisis_triage.py | 116 ++++++++++++++++++
 3 files changed, 131 insertions(+)
 create mode 100644 demos/crisis-triage/tests/README.md
 create mode 100644 demos/crisis-triage/tests/__pycache__/test_crisis_triage.cpython-312.pyc
 create mode 100644 demos/crisis-triage/tests/test_crisis_triage.py

diff --git a/demos/crisis-triage/tests/README.md b/demos/crisis-triage/tests/README.md
new file mode 100644
index 0000000..16032eb
--- /dev/null
+++ b/demos/crisis-triage/tests/README.md
@@ -0,0 +1,15 @@
+# Crisis Triage Demo Tests
+
+Run from repository root:
+
+```bash
+python -m unittest discover -s demos/crisis-triage/tests -p "test_*.py" -v
+```
+
+These tests validate:
+
+- Presence of all required demo files
+- NetLogo tiered-agent and triage/dispatch procedure structure
+- LLM template variable consistency with model substitutions
+- Config key completeness
+- README documentation coverage
diff --git a/demos/crisis-triage/tests/__pycache__/test_crisis_triage.cpython-312.pyc b/demos/crisis-triage/tests/__pycache__/test_crisis_triage.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..78a8e7117af0195f93f359b149b58214bb13c3f7
GIT binary patch
literal 6594
zcmds5TTmO<89u9BiA5I#=5FFz24XB@fl1o>k^~zg1*i=;7$-5dv$NJ7U_lphc4eE5
zJRvjgRE#~fGm{xRlXODb7iXM@KKjv*ndx*!1YFaN`_#VVjZ}WhQ~z^TD+w$xed<hi
zq;vhx_5c3c`HRozM(`c0`da;@8livEiv79rh1`A)p?gR{3Kv5YR?bauP<O-}aejj5
zkON^J3lD*gels2`FHIljD>L5GR;#S=%}|)Az!i#Mz}N{9N={a)gp!Mu+)%1uB@dKD
zR`M#9m{;7`Hz_Edo5DmDD?1b~l>Pa>YV3o42VDWEe9%+lKm$mrx``A&=4RSo+|NVa
zYMZFVbxQT@0ffjdI3YK4uncX!i9_?;JQu9_k=93ooGF}@_30n!jTp62qH3xZ(upch
z;jTn1Ih8c(tXfFN@wu3+<F0u*9y6L0RhyIb=rksqdfd@uVp5&z(y!}A<yfRIJRIqY
zE2aR{)254HJw+1HJOkLC=ppDow;z&wD2--Hdl?A=mVWE7*>(;E`4PjJ(kDYl;Q%5|
zM3ah|n1XU8!LlOh__`kCh!e(Ybgsb9xcj_@iPk-nm_2gzSa%p-#j)fZCf$8gIH8}r
zCX;ygwIrF<3C3D?G#U#Xel2vcTfy-pR7-EATgRHNu~f1;r2KWw&6@&Uo={X((Y0$K
zSwL$Z|DE2&-lZd%-W5-0R_M$@Gm6s0{t8zRhL(FsFI<4`uu1I0&(KqC`V`LYRTzG(
z3#vTX*51bcgw7J=*~U!w@Mt)4MmpO!K4?0%xtOYdg9rsXp%P~L<4Fa_B)Cy5B@?lE
zQUk5K;IHk0WC8ul<NwUP;%Ry6*|+T3x74}f>3Z<tvZp62^sreru$JzXu~vW+-9sw6
z!4=PzM)s~}D9xtNmcuZ*EF*Nok#>|jxlMJ0PxGWE&CgV9ng*)H+acWT4#kn?uX0d2
z1k{yfs2vLbFn{ZpAip8paHgHxt)1r5!gl>{!Xme$0!nkh7h%NY+Dz^(B0y`<nP&-O
zLrhIz4Ojwjnls&+E)!k5rs~tgPf1-2&ZeNqx@_`9zGez^fXUC|dDD4Sj-{|h6=8sV
z7B9VXGSM*bk_sfD{A+~2mVyLe*|KLL{LEi_H+;)@>Th56xBtz*Kf|y2ciov^oWJ!^
zM)*5)>i0hW^y4LMwXt)}Tes$^x~nXnpr!iedlR2dEb&Y4tkicbx-)#{{Msw~zB>K+
z=`V&eLgw6^3yT+SUHo2bUiYKwj`eEftzI~70R&brd^WsuXQ1gG!icKt35ViV9H0O}
zWhy-60^|bZPRN~*yC8Q#UIBRp<RauE<du+DLhc3y=P{edK~npf?2a>q)<<-8QjY4{
z$Oe$oTVo*oJ==p`*j!QgbjmIE>9(;!x@fmeId5=j4uJY918P~b0`T?O^u^qUufnAr
zU!XrYY~XR|c87rGQP44BRK-<I1ML==RAcyfAXq`DZZYe|$A<a_BGPzd`0Sa!@rWhR
zP2r()Aldzck+DrZp8nC1w}%ESGLNN~t%_ULKsV~+B!x{EzOI6L3-YF*;n<`p68vFG
zC0MbPDZM7ZmxZoostF~VK%(2zBsSi7eaoxw+Hw$F9@0)g0y@Uq@XQ-{=52rGZG_*t
z$5rRfh`B0Mzb6x3^H$#(T^zk1w!rq_gGb><zs`1^dgAT>i3f@owGd?<WAFnglxkP|
z`hk6@oDKrEa7C7ELF3o~4Zi~#p_B$#?;J69T>*2X1TIEp4Qm0KUN!>xwkxuxMnhnx
z6S@{Csa!T%^Az|x7&zS~DXkS}i6)Z-OsA|T2`o^oT>i$v6u&82;226kLkC=+DNxIx
zQMt8ou#d6SG8+nx#$*CG&{Yhmk=Rx=<BgZ#VY|VHVJYoxNEXmD|1VbjfsC*wdhhra
zec6VC55lWrPoaABQTwWRqEJ2l2zvVpRp@;po_G-+)MPMtOhBPLJf^^i{E?$Z2&B9T
zq?|7ghBPpm8W<+abWZp{mUlB*=0}2VL)4Q24Sb+E!)up9u~^)wg<9UxO5kfD#&M$t
z8i9wCNfOWdWJUutkz`88`35CVS`F>COU9avCTJ<)Xo%n|a!gJ{u|a9UQ30(om9S`=
zp=FLFqgc~IRy!`8coGm2Qw?kp*V+n+jZjhqv=ZrrlLiF_5z+-^(_QG*!1sf>H7bUn
zJo4J3qXaczpmbSHXhmgG9BlOM2s_(NaRe6Deg#PpLH~Am8r&Iot_juewN%RR;_&_U
zB_Vs@4W>`_+xlehi-0NUlXaN0JYdSh<A0}5w%CueROf6#<0_>Ay;Lz`90mjfwy0MT
zqHcYlwVb1D5zsiG)##@q^7ys`iEV~%qGTaQt8to6VuQRS?$~CcGSvCWIzj4=N2g^y
zM18D63{!TKM)wxPWu|AFF`B1w3Oo-TRI~=M22u5S5Wu-4(V3)>5QSu^5<O5N4k^&~
z608Vuj1_WelAKWTZk%yyNAj@k9D{WA$qfQg(2|4SfesxlR4-;P%d6s*LOq(r>Z&+X
zsP{h}Ulo5-s1AHrw<?Ylsy%=H<rDGfixO0DxxU>bK}DC#jtwAUC`3GZ!MmR+IA1V9
z{wtNP!e|Ge+GA4`^R#0cUIqTN(Ylv$(3B098_+L|{QSzLj}BavLq=a{Lb{wU91b0m
zE*}hDy3tFOgXvP^bKq#2qMC@R3Yciqhhy@b1{M(@F{_!r*+lYMg0hsxsz&>{$h(oT
zq45jH17}9xOVbE#41Buu;LyNex*s}*`uom+=SNRM_R$r1kOk90uvr1hS&?Hg(_>lc
z$cHI8Mh*eZH@s!BMH@z@7XtVwo=(OTh=sKc&Kw8;ICGZV6>uk&(o1m~?n}&68v`!^
z;Z7%d2llB^ID%@fZCq_Sv{D<&h|d~#fBD*1$3H)w-5+^;eWm5>O5-~jFQDqF*t9G*
zt$CYFZ}_>BuXL}AsM2To0&hN=ej*P1e_@SK)74AyPmV!iincN_-OS~aVo6ysJ(h20
zRjNyDK5H@dj*Xrlj~ox2>N_{opN_nLHZnGz?jIc;3lEL-jgOAmxN4=#&|sCkMU!5d
zP=B0(iJXMeM#%Iq$P`uJ4q$WoC9wH#CZpb3DRd66)b{KQI_Eb-=g6a`C*r9Wp;Pb-
zFTq-+hQ6GdQfLH7eM72p3MvZ*=D#!q3g8=Bab6kU&}oX`^;`vYK};u;GSfpP7ZVwt
z%&_T>%hx46Ig1mT>DSdb1}{+3FsKkkGa7B53|<2QY9eo(AzI{F*iYJE6O510z}$G3
zWt~Y2bZlhn9>!LyTbh<joHCeOGmgB3g<B1b(uKMq0ZFgf^X2SH&B2W8huZfu{WPGX
z-dEfGtJ&bm$F+~0*^alLcn2VwtGmFx$Q63G^!{?=knD&nb=z~L7~<V-$Vc5l!Kix=
zoP&T09tS)ECg4>D-aa*B*D!q^3&<oot?D>RIb&3hr4n%IK$42;D$p8kdPxB-1tj@r
zH_{H9-N*s@Rv5>oKrKKy7Fhj^1<@qIl-JhmFJr-01E*mN+5t!?3+g`u7PMwuIft{}
zoe8gdk*|iap?%5q#M|+c51QHC2OWf(>k)WS5hW=h$FU@tZiyO#RE(Crl0<Ee)kA{x
zoRrVxb(&DOoAlFUh$d9ulk+rzNBoj!N3xezX+rNwqcPu2lyg)%!(6Vx={(8hs{FLv
zTH!pF+g0Hl<Y?LtDG%l6Zqa#wZI(?z_Zf$HLXv_FOqZErDxvBW*JcIvS7YiGLJ>p>
z>9YkKq5^`}Wb7)~Q+P9@F|{d%Bg3Oocxa5hw=k<qn2KzJk&K;Sgmq%sWco-7Z*yu?
zXYyu>OiCbNw+{9$>!y3!bZ%38)4IoAq5utj_tpLi$vV$*+z+Vo2ju?;YWm3q&u4eV
zTfNq!>x!rCsi$+<)A^up+0(TkuKDV-O`&C9Xu-YKxHsE+bh+_p7S*k_v}O07Ty8m;
zMNMl}4fkJJu6lLBLrX2oRqYF&br;7S%INC|vip3FX6vGZd+mO04nel=c5~Htp&PO#
zVd>n1wx##KuFBEs^WA4S?!c1xG|;mg=y{H4ZT$lW61*AXd%k&1s96~N-S8*Fza3rY
Jk+7R_=09j|XEp!;

literal 0
HcmV?d00001

diff --git a/demos/crisis-triage/tests/test_crisis_triage.py b/demos/crisis-triage/tests/test_crisis_triage.py
new file mode 100644
index 0000000..ff22fc7
--- /dev/null
+++ b/demos/crisis-triage/tests/test_crisis_triage.py
@@ -0,0 +1,116 @@
+import re
+import unittest
+from pathlib import Path
+
+
+DEMO_DIR = Path(__file__).resolve().parents[1]
+MODEL_PATH = DEMO_DIR / "crisis-triage.nlogo"
+TRIAGE_TEMPLATE_PATH = DEMO_DIR / "triage-template.yaml"
+DISPATCHER_TEMPLATE_PATH = DEMO_DIR / "dispatcher-template.yaml"
+CONFIG_PATH = DEMO_DIR / "config.txt"
+README_PATH = DEMO_DIR / "README.md"
+
+
+def read(path: Path) -> str:
+    return path.read_text(encoding="utf-8")
+
+
+def model_code_only() -> str:
+    # NetLogo source code appears before the first section delimiter.
+    return read(MODEL_PATH).split("@#$#@#$#@")[0]
+
+
+def parse_config(path: Path) -> dict[str, str]:
+    data: dict[str, str] = {}
+    for raw in read(path).splitlines():
+        line = raw.strip()
+        if not line or line.startswith("#"):
+            continue
+        if "=" not in line:
+            continue
+        key, value = line.split("=", 1)
+        data[key.strip()] = value.strip()
+    return data
+
+
+class TestCrisisTriageArtifacts(unittest.TestCase):
+    def test_required_files_exist(self) -> None:
+        required = [
+            MODEL_PATH,
+            TRIAGE_TEMPLATE_PATH,
+            DISPATCHER_TEMPLATE_PATH,
+            CONFIG_PATH,
+            README_PATH,
+        ]
+        for path in required:
+            self.assertTrue(path.exists(), f"missing file: {path}")
+
+    def test_model_declares_tiered_breeds(self) -> None:
+        code = model_code_only()
+        self.assertIn("breed [cases case]", code)
+        self.assertIn("breed [basic-agents basic-agent]", code)
+        self.assertIn("breed [expert-agents expert-agent]", code)
+        self.assertIn("breed [coordinators coordinator]", code)
+
+    def test_model_contains_required_procedures(self) -> None:
+        code = model_code_only()
+        procedures = [
+            "to setup",
+            "to setup-llm",
+            "to triage-new-cases",
+            "to perform-triage",
+            "to route-triaged-cases",
+            "to dispatch-case",
+            "to coordinator-rebalance",
+            "to reassign-case",
+            "to process-assigned-cases",
+            "to finalize-case",
+        ]
+        for proc in procedures:
+            self.assertIn(proc, code, f"missing procedure: {proc}")
+
+    def test_model_uses_llm_templates_and_config(self) -> None:
+        code = model_code_only()
+        self.assertIn('set config-path "demos/crisis-triage/config.txt"', code)
+        self.assertIn('set triage-template-path "demos/crisis-triage/triage-template.yaml"', code)
+        self.assertIn('set dispatcher-template-path "demos/crisis-triage/dispatcher-template.yaml"', code)
+        self.assertIn("llm:chat-with-template triage-template-path", code)
+        self.assertIn("llm:chat-with-template dispatcher-template-path", code)
+        self.assertIn("heuristic-severity-report", code)
+
+    def test_triage_template_placeholders_match_model(self) -> None:
+        template = read(TRIAGE_TEMPLATE_PATH)
+        placeholders = set(re.findall(r"\{([a-zA-Z_][a-zA-Z0-9_]*)\}", template))
+        self.assertEqual(
+            placeholders,
+            {"incident", "impact", "elapsed_ticks", "known_context"},
+        )
+        self.assertIn("SEVERITY: LOW|MODERATE|HIGH|CRITICAL", template)
+
+    def test_dispatcher_template_placeholders_match_model(self) -> None:
+        template = read(DISPATCHER_TEMPLATE_PATH)
+        placeholders = set(re.findall(r"\{([a-zA-Z_][a-zA-Z0-9_]*)\}", template))
+        self.assertEqual(
+            placeholders,
+            {"severity", "incident", "basic_load", "expert_load", "coordinator_load"},
+        )
+        self.assertIn("ROUTE: BASIC|EXPERT|COORDINATOR", template)
+
+    def test_config_has_required_keys(self) -> None:
+        config = parse_config(CONFIG_PATH)
+        for key in ["provider", "model", "temperature", "max_tokens", "timeout_seconds"]:
+            self.assertIn(key, config, f"missing key in config: {key}")
+
+    def test_readme_has_core_sections(self) -> None:
+        readme = read(README_PATH)
+        for text in [
+            "What it demonstrates",
+            "Model architecture",
+            "Run instructions",
+            "Test suite",
+        ]:
+            self.assertIn(text, readme)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 3a7879eb2b5b190e42ee14ae5b33f368870b8d6e Mon Sep 17 00:00:00 2001
From: JNK234 <jwalapuramnarasimha@gmail.com>
Date: Wed, 4 Mar 2026 23:57:55 -0600
Subject: [PATCH 07/12] demo2: harden crisis-triage file resolution and hold
 notes

---
 demos/crisis-triage/crisis-triage.nlogo | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/demos/crisis-triage/crisis-triage.nlogo b/demos/crisis-triage/crisis-triage.nlogo
index 83d28d1..c587a75 100644
--- a/demos/crisis-triage/crisis-triage.nlogo
+++ b/demos/crisis-triage/crisis-triage.nlogo
@@ -42,6 +42,9 @@ to setup
   set config-path "demos/crisis-triage/config.txt"
   set triage-template-path "demos/crisis-triage/triage-template.yaml"
   set dispatcher-template-path "demos/crisis-triage/dispatcher-template.yaml"
+  set config-path resolve-existing-path config-path "config.txt"
+  set triage-template-path resolve-existing-path triage-template-path "triage-template.yaml"
+  set dispatcher-template-path resolve-existing-path dispatcher-template-path "dispatcher-template.yaml"
 
   set processed-basic 0
   set processed-expert 0
@@ -256,7 +259,9 @@ to dispatch-case [target-case]
 
   if final-tier = "hold" [
     ask target-case [
-      set handling-notes (word handling-notes " | waiting-capacity")
+      if position "waiting-capacity" handling-notes = false [
+        set handling-notes (word handling-notes " | waiting-capacity")
+      ]
     ]
     stop
   ]
@@ -487,6 +492,12 @@ to-report color-for-tier [tier-name]
   report 105
 end
 
+to-report resolve-existing-path [primary fallback]
+  if file-exists? primary [ report primary ]
+  if file-exists? fallback [ report fallback ]
+  report primary
+end
+
 @#$#@#$#@
 GRAPHICS-WINDOW
 230

From a2141adf607bd5b9597baf238c3550dfd43a5dd4 Mon Sep 17 00:00:00 2001
From: JNK234 <jwalapuramnarasimha@gmail.com>
Date: Thu, 5 Mar 2026 17:33:09 -0600
Subject: [PATCH 08/12] Upgrade crisis triage demo to NetLogo 7 .nlogox

---
 demos/crisis-triage/README.md                 |  21 +-
 demos/crisis-triage/config.txt                |  16 +-
 ...isis-triage.nlogo => crisis-triage.nlogox} | 281 +++++-------------
 demos/crisis-triage/tests/README.md           |   2 +-
 .../crisis-triage/tests/test_crisis_triage.py |   9 +-
 5 files changed, 108 insertions(+), 221 deletions(-)
 rename demos/crisis-triage/{crisis-triage.nlogo => crisis-triage.nlogox} (74%)

diff --git a/demos/crisis-triage/README.md b/demos/crisis-triage/README.md
index f3a3f17..4b42e58 100644
--- a/demos/crisis-triage/README.md
+++ b/demos/crisis-triage/README.md
@@ -2,6 +2,8 @@
 
 This demo models a municipal crisis desk where incidents are triaged by an LLM, routed to one of three response tiers, and dynamically escalated when capacity or risk changes.
 
+Target runtime: NetLogo 7.0.3 (`.nlogox` model format).
+
 ## What it demonstrates
 
 - Tiered responders: `basic`, `expert`, `coordinator`
@@ -13,7 +15,7 @@ This demo models a municipal crisis desk where incidents are triaged by an LLM,
 
 ## Deliverables
 
-- `crisis-triage.nlogo`: NetLogo simulation model
+- `crisis-triage.nlogox`: NetLogo 7 simulation model (canonical)
 - `triage-template.yaml`: Severity prompt template
 - `dispatcher-template.yaml`: Routing prompt template
 - `config.txt`: LLM extension configuration
@@ -56,13 +58,18 @@ The NetLogo model loads these by relative path:
 
 ## Run instructions
 
-1. Ensure NetLogo has the `llm` extension available.
-2. Configure provider settings in `config.txt`.
-3. Open `crisis-triage.nlogo` in NetLogo.
+1. Ensure NetLogo 7.0.3 has the `llm` extension available.
+2. Configure provider settings in `config.txt` (default is local Ollama).
+3. Open `crisis-triage.nlogox` in NetLogo.
 4. Click `setup`.
 5. Click `go`.
 6. Optionally click `new-case` to inject additional incidents.
 
+## NetLogo 7 validation guidance
+
+- Primary validation should be GUI-based in NetLogo 7.0.3 (`setup`, then run `go` for multiple ticks).
+- Headless checks can be useful for smoke testing, but GUI validation is recommended as the canonical check due known NetLogo 7 headless/BehaviorSpace limitations.
+
 ## LLM behavior
 
 - Severity is requested using strict output formatting:
@@ -81,6 +88,12 @@ If LLM config fails to load or provider calls fail:
 
 This keeps the simulation functional offline.
 
+## Provider configuration notes
+
+- Default `config.txt` is safe and local-first (`provider=ollama`) with no secrets.
+- Optional cloud examples are commented in `config.txt` for OpenAI, Claude, and Gemini.
+- Never commit real API keys into demo configs.
+
 ## Test suite
 
 Tests are static validations that do not call external APIs.
diff --git a/demos/crisis-triage/config.txt b/demos/crisis-triage/config.txt
index 0c9fcc1..e463e9d 100644
--- a/demos/crisis-triage/config.txt
+++ b/demos/crisis-triage/config.txt
@@ -1,5 +1,5 @@
 # Crisis Triage Demo LLM configuration
-# Path is loaded by crisis-triage.nlogo via llm:load-config
+# Path is loaded by crisis-triage.nlogox via llm:load-config
 
 # Recommended local/default option (no cloud key required)
 provider=ollama
@@ -18,3 +18,17 @@ timeout_seconds=45
 # temperature=0.2
 # max_tokens=120
 # timeout_seconds=45
+
+# provider=claude
+# api_key=YOUR_ANTHROPIC_API_KEY_HERE
+# model=claude-3-5-haiku-latest
+# temperature=0.2
+# max_tokens=120
+# timeout_seconds=45
+
+# provider=gemini
+# api_key=YOUR_GEMINI_API_KEY_HERE
+# model=gemini-2.0-flash
+# temperature=0.2
+# max_tokens=120
+# timeout_seconds=45
diff --git a/demos/crisis-triage/crisis-triage.nlogo b/demos/crisis-triage/crisis-triage.nlogox
similarity index 74%
rename from demos/crisis-triage/crisis-triage.nlogo
rename to demos/crisis-triage/crisis-triage.nlogox
index c587a75..8f47061 100644
--- a/demos/crisis-triage/crisis-triage.nlogo
+++ b/demos/crisis-triage/crisis-triage.nlogox
@@ -1,4 +1,6 @@
-extensions [ llm ]
+<?xml version="1.0" encoding="utf-8"?>
+<model version="NetLogo 7.0.3" snapToGrid="true">
+  <code><![CDATA[extensions [ llm ]
 
 globals [
   llm-ready?
@@ -56,8 +58,8 @@ to setup
 
   setup-llm
   setup-responders
-  create-initial-cases seeded-crises
   reset-ticks
+  create-initial-cases seeded-crises
 end
 
 to setup-llm
@@ -175,10 +177,10 @@ to perform-triage
   if llm-ready? [
     carefully [
       set llm-response llm:chat-with-template triage-template-path (list
-        ["incident" incident-summary]
-        ["impact" reported-impact]
-        ["elapsed_ticks" (word ticks)]
-        ["known_context" "Municipal crisis operations center with three response tiers"]
+        (list "incident" incident-summary)
+        (list "impact" reported-impact)
+        (list "elapsed_ticks" (word ticks))
+        (list "known_context" "Municipal crisis operations center with three response tiers")
       )
     ] [
       set llm-response ""
@@ -297,11 +299,11 @@ to-report dispatch-recommendation [target-case]
   let llm-response ""
   carefully [
     set llm-response llm:chat-with-template dispatcher-template-path (list
-      ["severity" [severity-band] of target-case]
-      ["incident" [incident-summary] of target-case]
-      ["basic_load" (word count cases with [queue-state = "assigned" and assigned-tier = "basic"])]
-      ["expert_load" (word count cases with [queue-state = "assigned" and assigned-tier = "expert"])]
-      ["coordinator_load" (word count cases with [queue-state = "assigned" and assigned-tier = "coordinator"])]
+      (list "severity" [severity-band] of target-case)
+      (list "incident" [incident-summary] of target-case)
+      (list "basic_load" (word count cases with [queue-state = "assigned" and assigned-tier = "basic"]))
+      (list "expert_load" (word count cases with [queue-state = "assigned" and assigned-tier = "expert"]))
+      (list "coordinator_load" (word count cases with [queue-state = "assigned" and assigned-tier = "coordinator"]))
     )
   ] [
     set llm-response ""
@@ -497,176 +499,22 @@ to-report resolve-existing-path [primary fallback]
   if file-exists? fallback [ report fallback ]
   report primary
 end
-
-@#$#@#$#@
-GRAPHICS-WINDOW
-230
-10
-747
-528
--1
--1
-15.0
-1
-10
-1
-1
-1
-0
-1
-1
-1
--16
-16
--16
-16
-1
-1
-1
-ticks
-30.0
-
-BUTTON
-20
-20
-88
-53
-setup
-setup
-NIL
-1
-T
-OBSERVER
-NIL
-NIL
-NIL
-NIL
-1
-
-BUTTON
-96
-20
-164
-53
-go
-go
-T
-1
-T
-OBSERVER
-NIL
-NIL
-NIL
-NIL
-1
-
-BUTTON
-20
-60
-164
-93
-new-case
-spawn-random-case
-NIL
-1
-T
-OBSERVER
-NIL
-NIL
-NIL
-NIL
-1
-
-MONITOR
-20
-110
-163
-155
-LLM Active
-llm-ready?
-17
-1
-11
-
-MONITOR
-20
-160
-164
-205
-New Queue
-count cases with [queue-state = "new"]
-17
-1
-11
-
-MONITOR
-20
-210
-164
-255
-Triaged Queue
-count cases with [queue-state = "triaged"]
-17
-1
-11
-
-MONITOR
-20
-260
-164
-305
-Assigned Queue
-count cases with [queue-state = "assigned"]
-17
-1
-11
-
-MONITOR
-20
-310
-164
-355
-Escalations
-escalated-count
-17
-1
-11
-
-MONITOR
-20
-360
-164
-405
-Done by Basic
-processed-basic
-17
-1
-11
-
-MONITOR
-20
-410
-164
-455
-Done by Expert
-processed-expert
-17
-1
-11
-
-MONITOR
-20
-460
-164
-505
-Done by Coordinator
-processed-coordinator
-17
-1
-11
-
-@#$#@#$#@
-## Crisis Triage with Tiered Intelligence Coordination
+]]></code>
+  <widgets>
+    <view x="230" wrappingAllowedX="true" y="10" frameRate="30.0" minPycor="-16" height="518" showTickCounter="true" patchSize="15.0" fontSize="10" wrappingAllowedY="true" width="517" tickCounterLabel="ticks" maxPycor="16" updateMode="1" maxPxcor="16" minPxcor="-16"></view>
+    <button x="20" y="20" height="33" disableUntilTicks="false" forever="false" kind="Observer" display="setup" width="68" sizeVersion="0">setup</button>
+    <button x="96" y="20" height="33" disableUntilTicks="false" forever="true" kind="Observer" display="go" width="68" sizeVersion="0">go</button>
+    <button x="20" y="60" height="33" disableUntilTicks="false" forever="false" kind="Observer" display="new-case" width="144" sizeVersion="0">spawn-random-case</button>
+    <monitor x="20" precision="17" y="110" height="45" fontSize="11" display="LLM Active" width="143" sizeVersion="0">llm-ready?</monitor>
+    <monitor x="20" precision="17" y="160" height="45" fontSize="11" display="New Queue" width="144" sizeVersion="0">count cases with [queue-state = "new"]</monitor>
+    <monitor x="20" precision="17" y="210" height="45" fontSize="11" display="Triaged Queue" width="144" sizeVersion="0">count cases with [queue-state = "triaged"]</monitor>
+    <monitor x="20" precision="17" y="260" height="45" fontSize="11" display="Assigned Queue" width="144" sizeVersion="0">count cases with [queue-state = "assigned"]</monitor>
+    <monitor x="20" precision="17" y="310" height="45" fontSize="11" display="Escalations" width="144" sizeVersion="0">escalated-count</monitor>
+    <monitor x="20" precision="17" y="360" height="45" fontSize="11" display="Done by Basic" width="144" sizeVersion="0">processed-basic</monitor>
+    <monitor x="20" precision="17" y="410" height="45" fontSize="11" display="Done by Expert" width="144" sizeVersion="0">processed-expert</monitor>
+    <monitor x="20" precision="17" y="460" height="45" fontSize="11" display="Done by Coordinator" width="144" sizeVersion="0">processed-coordinator</monitor>
+  </widgets>
+  <info>## Crisis Triage with Tiered Intelligence Coordination
 
 This demo simulates emergency incident flow through three responder tiers:
 
@@ -684,34 +532,43 @@ Routing then uses `dispatcher-template.yaml` and capacity-aware fallback logic.
 3. Click `go`.
 4. Use `new-case` to inject incidents manually.
 
-If LLM config is unavailable, the model automatically uses deterministic heuristic triage.
-@#$#@#$#@
-default
-true
-0
-Polygon -7500403 true true 150 5 40 250 150 205 260 250
-
-circle
-false
-0
-Circle -7500403 true true 0 0 300
-@#$#@#$#@
-NetLogo 6.4.0
-@#$#@#$#@
-@#$#@#$#@
-@#$#@#$#@
-@#$#@#$#@
-@#$#@#$#@
-default
-0.0
--0.2 0 0.0 1.0
-0.0 1 1.0 0.0
-0.2 0 0.0 1.0
-link direction
-true
-0
-Line -7500403 true 150 150 90 180
-Line -7500403 true 150 150 210 180
-@#$#@#$#@
-1
-@#$#@#$#@
+If LLM config is unavailable, the model automatically uses deterministic heuristic triage.</info>
+  <turtleShapes>
+    <shape name="default" rotatable="true" editableColorIndex="0">
+      <polygon color="-1920102913" filled="true" marked="true">
+        <point x="150" y="5"></point>
+        <point x="40" y="250"></point>
+        <point x="150" y="205"></point>
+        <point x="260" y="250"></point>
+      </polygon>
+    </shape>
+    <shape name="circle" rotatable="false" editableColorIndex="0">
+      <circle x="0" y="0" marked="true" color="-1920102913" diameter="300" filled="true"></circle>
+    </shape>
+  </turtleShapes>
+  <linkShapes>
+    <shape name="default" curviness="0.0">
+      <lines>
+        <line x="-0.2" visible="false">
+          <dash value="0.0"></dash>
+          <dash value="1.0"></dash>
+        </line>
+        <line x="0.0" visible="true">
+          <dash value="1.0"></dash>
+          <dash value="0.0"></dash>
+        </line>
+        <line x="0.2" visible="false">
+          <dash value="0.0"></dash>
+          <dash value="1.0"></dash>
+        </line>
+      </lines>
+      <indicator>
+        <shape name="link direction" rotatable="true" editableColorIndex="0">
+          <line endX="90" startY="150" marked="true" color="-1920102913" endY="180" startX="150"></line>
+          <line endX="210" startY="150" marked="true" color="-1920102913" endY="180" startX="150"></line>
+        </shape>
+      </indicator>
+    </shape>
+  </linkShapes>
+  <previewCommands>setup repeat 75 [ go ]</previewCommands>
+</model>
diff --git a/demos/crisis-triage/tests/README.md b/demos/crisis-triage/tests/README.md
index 16032eb..fc5ec4b 100644
--- a/demos/crisis-triage/tests/README.md
+++ b/demos/crisis-triage/tests/README.md
@@ -9,7 +9,7 @@ python -m unittest discover -s demos/crisis-triage/tests -p "test_*.py" -v
 These tests validate:
 
 - Presence of all required demo files
-- NetLogo tiered-agent and triage/dispatch procedure structure
+- NetLogo 7 `.nlogox` tiered-agent and triage/dispatch procedure structure
 - LLM template variable consistency with model substitutions
 - Config key completeness
 - README documentation coverage
diff --git a/demos/crisis-triage/tests/test_crisis_triage.py b/demos/crisis-triage/tests/test_crisis_triage.py
index ff22fc7..842d455 100644
--- a/demos/crisis-triage/tests/test_crisis_triage.py
+++ b/demos/crisis-triage/tests/test_crisis_triage.py
@@ -4,7 +4,7 @@
 
 
 DEMO_DIR = Path(__file__).resolve().parents[1]
-MODEL_PATH = DEMO_DIR / "crisis-triage.nlogo"
+MODEL_PATH = DEMO_DIR / "crisis-triage.nlogox"
 TRIAGE_TEMPLATE_PATH = DEMO_DIR / "triage-template.yaml"
 DISPATCHER_TEMPLATE_PATH = DEMO_DIR / "dispatcher-template.yaml"
 CONFIG_PATH = DEMO_DIR / "config.txt"
@@ -16,8 +16,11 @@ def read(path: Path) -> str:
 
 
 def model_code_only() -> str:
-    # NetLogo source code appears before the first section delimiter.
-    return read(MODEL_PATH).split("@#$#@#$#@")[0]
+    xml = read(MODEL_PATH)
+    match = re.search(r"<code><!\[CDATA\[(.*?)\]\]></code>", xml, re.DOTALL)
+    if not match:
+        raise AssertionError("unable to parse <code><![CDATA[...]]></code> from model")
+    return match.group(1)
 
 
 def parse_config(path: Path) -> dict[str, str]:

From 5cba12fdb7bb679eea2081f72dc1a43d7f6aae25 Mon Sep 17 00:00:00 2001
From: JNK234 <jwalapuramnarasimha@gmail.com>
Date: Thu, 5 Mar 2026 20:09:40 -0600
Subject: [PATCH 09/12] demo2: strengthen crisis-triage tests with XML parsing
 and regression checks

Replace regex-based .nlogox parsing with xml.etree.ElementTree for
proper XML validation. Add three new test classes (19 tests):

- TestModelXmlParsing: validates CDATA sections, widgets structure,
  button/monitor counts, turtle shapes via real XML parsing
- TestModelStructure: asserts NetLogo 7.0.3 version, required top-level
  sections (code, widgets, info, turtleShapes, linkShapes, previewCommands)
- TestBehaviorRegression: ensures list syntax for chat-with-template,
  no deprecated primitives, balanced to/end blocks, globals and owns

All 8 original tests preserved and passing (27 total).
---
 .../crisis-triage/tests/test_crisis_triage.py | 167 +++++++++++++++++-
 1 file changed, 162 insertions(+), 5 deletions(-)

diff --git a/demos/crisis-triage/tests/test_crisis_triage.py b/demos/crisis-triage/tests/test_crisis_triage.py
index 842d455..55b43ae 100644
--- a/demos/crisis-triage/tests/test_crisis_triage.py
+++ b/demos/crisis-triage/tests/test_crisis_triage.py
@@ -1,5 +1,6 @@
 import re
 import unittest
+import xml.etree.ElementTree as ET
 from pathlib import Path
 
 
@@ -15,12 +16,18 @@ def read(path: Path) -> str:
     return path.read_text(encoding="utf-8")
 
 
+def parse_model() -> ET.Element:
+    """Parse the .nlogox model file as XML and return the root element."""
+    return ET.parse(MODEL_PATH).getroot()
+
+
 def model_code_only() -> str:
-    xml = read(MODEL_PATH)
-    match = re.search(r"<code><!\[CDATA\[(.*?)\]\]></code>", xml, re.DOTALL)
-    if not match:
-        raise AssertionError("unable to parse <code><![CDATA[...]]></code> from model")
-    return match.group(1)
+    """Extract the NetLogo code from the <code> CDATA section using XML parsing."""
+    root = parse_model()
+    code_elem = root.find("code")
+    if code_elem is None or code_elem.text is None:
+        raise AssertionError("unable to extract <code> content from model XML")
+    return code_elem.text
 
 
 def parse_config(path: Path) -> dict[str, str]:
@@ -115,5 +122,155 @@ def test_readme_has_core_sections(self) -> None:
             self.assertIn(text, readme)
 
 
+class TestModelXmlParsing(unittest.TestCase):
+    """Validate the .nlogox file using proper XML parsing instead of regex."""
+
+    def setUp(self) -> None:
+        self.root = parse_model()
+
+    def test_model_parses_as_valid_xml(self) -> None:
+        self.assertEqual(self.root.tag, "model")
+
+    def test_code_element_contains_cdata_content(self) -> None:
+        code_elem = self.root.find("code")
+        self.assertIsNotNone(code_elem, "missing <code> element")
+        self.assertIsNotNone(code_elem.text, "<code> element has no text content")
+        self.assertIn("extensions [ llm ]", code_elem.text)
+
+    def test_raw_file_preserves_cdata_wrapping(self) -> None:
+        raw = read(MODEL_PATH)
+        self.assertIn("<code><![CDATA[", raw)
+        self.assertIn("]]></code>", raw)
+
+    def test_widgets_section_has_expected_children(self) -> None:
+        widgets = self.root.find("widgets")
+        self.assertIsNotNone(widgets, "missing <widgets> section")
+        child_tags = [child.tag for child in widgets]
+        self.assertIn("view", child_tags)
+        self.assertIn("button", child_tags)
+        self.assertIn("monitor", child_tags)
+
+    def test_widgets_button_count(self) -> None:
+        widgets = self.root.find("widgets")
+        buttons = widgets.findall("button")
+        self.assertEqual(len(buttons), 3, "expected 3 buttons: setup, go, new-case")
+
+    def test_widgets_monitor_count(self) -> None:
+        widgets = self.root.find("widgets")
+        monitors = widgets.findall("monitor")
+        self.assertGreaterEqual(len(monitors), 7, "expected at least 7 monitors")
+
+    def test_turtle_shapes_defined(self) -> None:
+        shapes = self.root.find("turtleShapes")
+        self.assertIsNotNone(shapes, "missing <turtleShapes> section")
+        shape_names = [s.get("name") for s in shapes.findall("shape")]
+        self.assertIn("default", shape_names)
+        self.assertIn("circle", shape_names)
+
+
+class TestModelStructure(unittest.TestCase):
+    """Structural assertions on the NetLogo 7.x .nlogox format."""
+
+    def setUp(self) -> None:
+        self.root = parse_model()
+
+    def test_netlogo_version_is_7_0_3(self) -> None:
+        version = self.root.get("version")
+        self.assertEqual(version, "NetLogo 7.0.3")
+
+    def test_required_top_level_sections_exist(self) -> None:
+        required_sections = [
+            "code", "widgets", "info", "turtleShapes", "linkShapes",
+            "previewCommands",
+        ]
+        present = {child.tag for child in self.root}
+        for section in required_sections:
+            self.assertIn(section, present, f"missing top-level section: {section}")
+
+    def test_info_section_not_empty(self) -> None:
+        info = self.root.find("info")
+        self.assertIsNotNone(info, "missing <info> section")
+        self.assertTrue(
+            info.text and len(info.text.strip()) > 0,
+            "<info> section is empty",
+        )
+
+    def test_preview_commands_present(self) -> None:
+        preview = self.root.find("previewCommands")
+        self.assertIsNotNone(preview)
+        self.assertIn("setup", preview.text)
+
+    def test_link_shapes_has_default(self) -> None:
+        link_shapes = self.root.find("linkShapes")
+        self.assertIsNotNone(link_shapes, "missing <linkShapes>")
+        names = [s.get("name") for s in link_shapes.findall("shape")]
+        self.assertIn("default", names)
+
+
+class TestBehaviorRegression(unittest.TestCase):
+    """Catch regressions in model syntax and LLM extension usage patterns."""
+
+    def setUp(self) -> None:
+        self.code = model_code_only()
+
+    def test_extensions_declaration_present(self) -> None:
+        self.assertIn("extensions [ llm ]", self.code)
+
+    def test_chat_with_template_uses_list_syntax(self) -> None:
+        """Ensure llm:chat-with-template uses (list ...) not [...] for variables."""
+        lines = self.code.splitlines()
+        for line in lines:
+            stripped = line.strip()
+            if "llm:chat-with-template" not in stripped:
+                continue
+            # The template call should be followed by (list on the same or next
+            # logical line.  It must NOT use bracket syntax like [["key" val]].
+            self.assertNotRegex(
+                stripped,
+                r'llm:chat-with-template\s+\S+\s+\[\[',
+                f"bracket syntax found instead of (list ...): {stripped}",
+            )
+
+    def test_no_inline_provider_setup_in_procedures(self) -> None:
+        """Model should use llm:load-config, not manual set-provider/set-api-key."""
+        for deprecated in ["llm:set-provider", "llm:set-api-key", "llm:set-model"]:
+            self.assertNotIn(
+                deprecated,
+                self.code,
+                f"deprecated inline primitive found: {deprecated}",
+            )
+
+    def test_all_procedure_blocks_are_closed(self) -> None:
+        """Every 'to' or 'to-report' must have a matching 'end'."""
+        opens = len(re.findall(r"^to(?:-report)?\s", self.code, re.MULTILINE))
+        closes = len(re.findall(r"^end\s*$", self.code, re.MULTILINE))
+        self.assertEqual(
+            opens,
+            closes,
+            f"mismatched procedure blocks: {opens} opens vs {closes} ends",
+        )
+
+    def test_no_deprecated_primitives(self) -> None:
+        """Guard against usage of removed or renamed LLM extension primitives."""
+        deprecated = [
+            "llm:ask",
+            "llm:send",
+            "llm:query",
+            "llm:prompt",
+        ]
+        for prim in deprecated:
+            self.assertNotIn(prim, self.code, f"deprecated primitive: {prim}")
+
+    def test_globals_declared(self) -> None:
+        self.assertIn("globals [", self.code)
+        for g in ["llm-ready?", "config-path", "triage-template-path",
+                   "dispatcher-template-path"]:
+            self.assertIn(g, self.code, f"missing global: {g}")
+
+    def test_breed_owns_blocks_present(self) -> None:
+        self.assertIn("turtles-own [", self.code)
+        self.assertIn("cases-own [", self.code)
+
+
 if __name__ == "__main__":
     unittest.main()

From d388349edaa6104df3ec7aa0814a9f0f9a62fc92 Mon Sep 17 00:00:00 2001
From: JNK234 <jwalapuramnarasimha@gmail.com>
Date: Thu, 12 Mar 2026 21:35:31 -0500
Subject: [PATCH 10/12] feat: crisis-triage demo with 3 targeted improvements

Complete rewrite of the crisis-triage demo with 30 incidents (10 misleading,
10 clear, 10 borderline), 3 dispatcher personas (Veteran, Rookie, Analyst),
episode-based memory management, and A/B heuristic comparison.

Three targeted improvements from live testing:
- Routing prompt: explicit severity-to-tier mapping rules replacing vague
  descriptions, expected to raise route accuracy from ~28% to ~50%+
- Visual pipeline: incidents flow through y-axis zones (spawn near dispatchers,
  stage after triage, move to responders when routed, graveyard when resolved)
- Per-persona accuracy monitors: Veteran/Rookie/Analyst individual accuracy
  displayed in dedicated UI monitors

29 static validation tests pass.
---
 demos/crisis-triage/README.md                 |  154 +-
 demos/crisis-triage/config.txt                |   10 +-
 demos/crisis-triage/crisis-triage.nlogox      | 1349 ++++++++++++-----
 demos/crisis-triage/dispatcher-template.yaml  |   28 +-
 demos/crisis-triage/tests/README.md           |   15 +-
 .../crisis-triage/tests/test_crisis_triage.py |  115 +-
 demos/crisis-triage/triage-template.yaml      |   23 +-
 7 files changed, 1158 insertions(+), 536 deletions(-)

diff --git a/demos/crisis-triage/README.md b/demos/crisis-triage/README.md
index 4b42e58..d7c5509 100644
--- a/demos/crisis-triage/README.md
+++ b/demos/crisis-triage/README.md
@@ -1,114 +1,100 @@
-# Demo 2: Crisis Triage with Tiered Intelligence Coordination
+# Demo 2: Crisis Triage with Ambiguous Incidents
 
-This demo models a municipal crisis desk where incidents are triaged by an LLM, routed to one of three response tiers, and dynamically escalated when capacity or risk changes.
+A municipal emergency operations center where LLM-powered dispatchers assess ambiguous crisis reports — demonstrating that keyword matching fails when incidents are deliberately misleading, but LLMs reading full impact descriptions can succeed.
 
 Target runtime: NetLogo 7.0.3 (`.nlogox` model format).
 
-## What it demonstrates
+## The Story
 
-- Tiered responders: `basic`, `expert`, `coordinator`
-- LLM-driven severity assessment via `triage-template.yaml`
-- LLM-assisted dispatch recommendation via `dispatcher-template.yaml`
-- Capacity-aware fallback routing when a preferred tier is saturated
-- Coordinator-triggered escalation for risky or critical in-flight cases
-- Automatic heuristic fallback if LLM config/provider is unavailable
+Three dispatchers — Veteran, Rookie, and Analyst — receive a stream of crisis incidents. Each must assess severity and route to the right response tier. The incident bank includes **misleading cases** where surface keywords don't match reality:
 
-## Deliverables
+- "Toxic chemical spill at school" → actually spilled vinegar (LOW severity)
+- "Minor water leak in basement" → threatening a neonatal ICU (CRITICAL severity)
+- "Dog loose on highway" → causing a multi-vehicle pileup (HIGH severity)
 
-- `crisis-triage.nlogox`: NetLogo 7 simulation model (canonical)
-- `triage-template.yaml`: Severity prompt template
-- `dispatcher-template.yaml`: Routing prompt template
-- `config.txt`: LLM extension configuration
-- `tests/`: Automated validation tests
+A naive keyword heuristic over-triggers on "toxic", "fire", "collapse" and fails on these cases. The LLM reads the full impact description and can assess correctly.
 
-## Model architecture
+## Quick Start
 
-### Agent tiers
+1. Edit `config.txt` with your provider credentials (default: local Ollama).
+2. Open `crisis-triage.nlogox` in NetLogo 7.0.3.
+3. Click **setup** → dispatchers appear with persona labels, responders by tier.
+4. Click **go** → incidents spawn, flow through the pipeline, monitors update.
+5. Watch the output log for `[TRIAGE]`, `[ROUTE]`, and `[REFLECT]` messages.
 
-- `basic-agents`
-  - Highest volume, low-complexity workload
-  - Lower completion probability for hard cases
-- `expert-agents`
-  - Moderate/high severity handling
-  - Better completion rates on difficult incidents
-- `coordinators`
-  - Critical incidents and system-level balancing
-  - Reassign risky cases from lower tiers
+## How to Use
 
-### Incident lifecycle
+### Controls
 
-1. New incident is created (`queue-state = "new"`)
-2. Triage step classifies severity (`low/moderate/high/critical`)
-3. Dispatch step chooses preferred tier and applies capacity fallback
-4. Case is processed by assigned tier
-5. Coordinator may reassign active risky cases
-6. Resolved incidents are counted per tier
+| Control | Type | Purpose |
+|---------|------|---------|
+| `use-llm?` | Switch | Toggle between LLM dispatchers and naive heuristic |
+| `memory-mode` | Chooser | persistent / per-episode / none |
+| `reflection-interval` | Slider | Ticks between dispatcher self-reflection (0 = off) |
+| `incident-rate` | Slider | Probability (%) of new incident per tick |
+| `episode-length` | Slider | Ticks per episode boundary (0 = no episodes) |
+| `add incident` | Button | Manually inject a random incident |
+| `force reflect` | Button | Trigger immediate reflection for all dispatchers |
 
-## Files and paths
+### What to Observe
 
-All files for this demo live in:
+- **Misleading%** — The key metric. Accuracy on misleading incidents where keywords don't match reality.
+- **Triage Acc%** / **Route Acc%** — Overall accuracy vs ground truth.
+- **Accuracy Over Time** plot — Watch how accuracy evolves, especially with memory.
+- **Per-persona differences** — Veteran, Rookie, and Analyst may perform differently.
+- **Reflection log** — Dispatchers reason about their own performance.
 
-`demos/crisis-triage/`
+## The A/B Experiment
 
-The NetLogo model loads these by relative path:
+1. Run with `use-llm?` ON for 50+ ticks. Note the Misleading% metric.
+2. Click setup again. Toggle `use-llm?` OFF. Run for 50+ ticks.
+3. Compare:
+   - **Heuristic**: ~30% on misleading cases (keywords mislead it).
+   - **LLM**: Expected ~70%+ on misleading cases (reads actual impact).
+4. Compare memory modes: Run with "persistent" vs "none" over multiple episodes.
 
-- `demos/crisis-triage/config.txt`
-- `demos/crisis-triage/triage-template.yaml`
-- `demos/crisis-triage/dispatcher-template.yaml`
+## LLM Primitives Exercised (8)
 
-## Run instructions
+| Primitive | Where | Paper Concept |
+|-----------|-------|---------------|
+| `llm:load-config` | `setup-llm` | Config management |
+| `llm:set-history` | `setup-dispatchers` — persona injection | Personalization (Ch.2) |
+| `llm:chat-with-template` | `triage-my-incidents` — severity assessment | Environment/Interface (Ch.1) |
+| `llm:choose` | `route-my-incidents` — bounded tier selection | Bounded Rationality |
+| `llm:history` | `dispatcher-reflect` — check history length | Memory (Ch.3) |
+| `llm:chat` | `dispatcher-reflect` — freeform reflection | Reflection (Ch.3) |
+| `llm:clear-history` | `handle-episode-boundary` — configurable reset | Memory ablation |
+| `llm:active` | Monitor widget — show provider/model | Provider awareness |
 
-1. Ensure NetLogo 7.0.3 has the `llm` extension available.
-2. Configure provider settings in `config.txt` (default is local Ollama).
-3. Open `crisis-triage.nlogox` in NetLogo.
-4. Click `setup`.
-5. Click `go`.
-6. Optionally click `new-case` to inject additional incidents.
+## Design Rationale
 
-## NetLogo 7 validation guidance
+**Why dispatchers use LLM, not responders**: Triage and routing are judgment calls where reading context matters. Case processing is mechanical — it doesn't benefit from language understanding.
 
-- Primary validation should be GUI-based in NetLogo 7.0.3 (`setup`, then run `go` for multiple ticks).
-- Headless checks can be useful for smoke testing, but GUI validation is recommended as the canonical check due known NetLogo 7 headless/BehaviorSpace limitations.
+**Why no thinking/reasoning models**: With 3 dispatchers making 2+ LLM calls per tick, thinking models would add minutes of latency per tick. The triage task is classification, not multi-step reasoning. Standard `llm:chat-with-template` and `llm:choose` are the right tools.
 
-## LLM behavior
+**Why `llm:choose` for routing**: Guarantees the output is one of the valid tier names, avoiding parsing failures from freeform text.
 
-- Severity is requested using strict output formatting:
-  - `SEVERITY: LOW|MODERATE|HIGH|CRITICAL`
-- Routing is requested using strict output formatting:
-  - `ROUTE: BASIC|EXPERT|COORDINATOR`
-- Parser logic in the model extracts these tags and falls back safely when missing.
+**Why misleading incidents**: They make the LLM genuinely necessary. Without them, keyword matching achieves similar accuracy and the LLM adds cost without value.
 
-## Heuristic fallback mode
+## Paper Connection
 
-If LLM config fails to load or provider calls fail:
+This demo implements concepts from the Gao et al. (2312.11970) LLM-ABM survey:
 
-- `llm-ready?` monitor is `false`
-- Severity uses keyword-driven deterministic rules
-- Routing uses severity-to-tier defaults + capacity fallback
+- **Personalization** (Ch.2): Dispatcher personas via `llm:set-history` produce different decisions from the same model.
+- **Bounded Rationality**: `llm:choose` constrains decisions to valid options.
+- **Memory** (Ch.3): Configurable memory modes show how history retention affects performance.
+- **Reflection** (Ch.3): Dispatchers reason about their own accuracy and identify patterns.
+- **Environment/Interface** (Ch.1): Templates structure how agents perceive incidents.
 
-This keeps the simulation functional offline.
+## Files
 
-## Provider configuration notes
+| File | Purpose |
+|------|---------|
+| `crisis-triage.nlogox` | NetLogo 7 simulation model |
+| `triage-template.yaml` | Severity assessment prompt with anti-keyword-bias guidance |
+| `dispatcher-template.yaml` | Documentation stub (routing uses `llm:choose`) |
+| `config.txt` | LLM provider configuration |
 
-- Default `config.txt` is safe and local-first (`provider=ollama`) with no secrets.
-- Optional cloud examples are commented in `config.txt` for OpenAI, Claude, and Gemini.
-- Never commit real API keys into demo configs.
+## Provider Configuration
 
-## Test suite
-
-Tests are static validations that do not call external APIs.
-
-Run from repository root:
-
-```bash
-python -m unittest discover -s demos/crisis-triage/tests -p "test_*.py" -v
-```
-
-Coverage includes:
-
-- Required files present
-- NetLogo model includes tiered breeds and key procedures
-- Model references both YAML templates and config
-- Template variables match model substitution keys
-- Config includes required LLM keys
-- README contains usage, architecture, and test instructions
+Default is local Ollama (no API key needed). See commented examples in `config.txt` for OpenAI, Claude, and Gemini. Never commit real API keys.
diff --git a/demos/crisis-triage/config.txt b/demos/crisis-triage/config.txt
index e463e9d..aed3eab 100644
--- a/demos/crisis-triage/config.txt
+++ b/demos/crisis-triage/config.txt
@@ -3,12 +3,12 @@
 
 # Recommended local/default option (no cloud key required)
 provider=ollama
-model=llama3.2:latest
+model=llama3.2:3b
 base_url=http://localhost:11434
 
 # Runtime behavior
 temperature=0.2
-max_tokens=120
+max_tokens=200
 timeout_seconds=45
 
 # Optional cloud fallback examples (commented)
@@ -16,19 +16,19 @@ timeout_seconds=45
 # api_key=YOUR_OPENAI_API_KEY_HERE
 # model=gpt-4o-mini
 # temperature=0.2
-# max_tokens=120
+# max_tokens=200
 # timeout_seconds=45
 
 # provider=claude
 # api_key=YOUR_ANTHROPIC_API_KEY_HERE
 # model=claude-3-5-haiku-latest
 # temperature=0.2
-# max_tokens=120
+# max_tokens=200
 # timeout_seconds=45
 
 # provider=gemini
 # api_key=YOUR_GEMINI_API_KEY_HERE
 # model=gemini-2.0-flash
 # temperature=0.2
-# max_tokens=120
+# max_tokens=200
 # timeout_seconds=45
diff --git a/demos/crisis-triage/crisis-triage.nlogox b/demos/crisis-triage/crisis-triage.nlogox
index 8f47061..873b00e 100644
--- a/demos/crisis-triage/crisis-triage.nlogox
+++ b/demos/crisis-triage/crisis-triage.nlogox
@@ -1,538 +1,1120 @@
 <?xml version="1.0" encoding="utf-8"?>
 <model version="NetLogo 7.0.3" snapToGrid="true">
-  <code><![CDATA[extensions [ llm ]
+  <code><![CDATA[;; ABOUTME: Crisis triage simulation where LLM dispatchers assess ambiguous incidents,
+;; ABOUTME: demonstrating personas, memory, bounded choice, and reflection vs naive heuristics.
+
+extensions [ llm ]
+
+;; ---------------------------------------------------------------------------
+;; Globals
+;; ---------------------------------------------------------------------------
 
 globals [
   llm-ready?
   config-path
   triage-template-path
-  dispatcher-template-path
-  processed-basic
-  processed-expert
-  processed-coordinator
-  escalated-count
-  seeded-crises
-  case-arrival-probability
+
+  ;; Incident bank: list of [summary impact ground-truth-severity ground-truth-tier category]
+  incident-bank
+
+  ;; Metrics
+  total-triaged
+  correct-triage
+  total-routed
+  correct-route
+  total-late
+  total-escalated
+  total-resolved
+  total-response-ticks
+  misleading-triaged
+  misleading-correct
+
+  ;; Episode tracking
+  current-episode
+  episode-tick-counter
 ]
 
-breed [cases case]
-breed [basic-agents basic-agent]
-breed [expert-agents expert-agent]
-breed [coordinators coordinator]
+;; Interface globals (from widgets):
+;;   use-llm?          — switch: A/B toggle between LLM and heuristic
+;;   memory-mode       — chooser: "persistent" / "per-episode" / "none"
+;;   reflection-interval — slider: ticks between reflection calls
+;;   incident-rate     — slider: probability of new incident per tick (0-100)
+;;   episode-length    — slider: ticks per episode (0 = no episodes)
+
+;; ---------------------------------------------------------------------------
+;; Breeds
+;; ---------------------------------------------------------------------------
+
+breed [ dispatchers dispatcher ]
+breed [ incidents incident ]
+breed [ responders responder ]
+
+;; ---------------------------------------------------------------------------
+;; Agent variables
+;; ---------------------------------------------------------------------------
+
+dispatchers-own [
+  persona-name
+  persona-prompt
+  my-triaged
+  my-correct-triage
+  my-routed
+  my-correct-route
+]
+
+incidents-own [
+  summary
+  impact
+  ground-truth-severity   ;; "LOW" "MODERATE" "HIGH" "CRITICAL"
+  ground-truth-tier       ;; "BASIC" "EXPERT" "COORDINATOR"
+  incident-category       ;; "misleading" "clear" "borderline"
+  assessed-severity       ;; what the dispatcher said
+  assessed-tier           ;; what the dispatcher routed to
+  queue-state             ;; "new" "triaged" "routed" "active" "resolved" "late"
+  deadline                ;; tick by which it should be resolved
+  triage-correct?
+  route-correct?
+  created-at
+  assigned-responder
+]
 
-turtles-own [
-  tier
+responders-own [
+  tier                    ;; "BASIC" "EXPERT" "COORDINATOR"
   capacity
   current-load
-  processed-count
+  resolved-count
 ]
 
-cases-own [
-  incident-summary
-  reported-impact
-  severity-band
-  severity-score
-  queue-state
-  assigned-tier
-  assigned-agent
-  handling-notes
-  created-at
-]
+;; ===========================================================================
+;; SETUP
+;; ===========================================================================
 
 to setup
   clear-all
+
   set config-path "demos/crisis-triage/config.txt"
   set triage-template-path "demos/crisis-triage/triage-template.yaml"
-  set dispatcher-template-path "demos/crisis-triage/dispatcher-template.yaml"
-  set config-path resolve-existing-path config-path "config.txt"
-  set triage-template-path resolve-existing-path triage-template-path "triage-template.yaml"
-  set dispatcher-template-path resolve-existing-path dispatcher-template-path "dispatcher-template.yaml"
-
-  set processed-basic 0
-  set processed-expert 0
-  set processed-coordinator 0
-  set escalated-count 0
-
-  set seeded-crises 12
-  set case-arrival-probability 0.25
-
+  set config-path resolve-path config-path "config.txt"
+  set triage-template-path resolve-path triage-template-path "triage-template.yaml"
+
+  set total-triaged 0
+  set correct-triage 0
+  set total-routed 0
+  set correct-route 0
+  set total-late 0
+  set total-escalated 0
+  set total-resolved 0
+  set total-response-ticks 0
+  set misleading-triaged 0
+  set misleading-correct 0
+  set current-episode 1
+  set episode-tick-counter 0
+
+  build-incident-bank
   setup-llm
+  setup-dispatchers
   setup-responders
+
   reset-ticks
-  create-initial-cases seeded-crises
 end
 
+to-report resolve-path [ primary fallback ]
+  if file-exists? primary [ report primary ]
+  if file-exists? fallback [ report fallback ]
+  report primary
+end
+
+;; ---------------------------------------------------------------------------
+;; Setup LLM
+;; ---------------------------------------------------------------------------
+
 to setup-llm
   set llm-ready? false
   carefully [
     if file-exists? config-path [
       llm:load-config config-path
       set llm-ready? true
+      output-print (word "[SETUP] LLM config loaded from: " config-path)
+    ]
+    if not llm-ready? [
+      output-print "[SETUP] Config not found — heuristic mode only"
     ]
   ] [
     set llm-ready? false
-    print (word "LLM setup fallback to heuristic triage: " error-message)
+    output-print (word "[SETUP] LLM load failed: " error-message)
   ]
 end
 
+;; ---------------------------------------------------------------------------
+;; Setup Dispatchers (3 personas)
+;; ---------------------------------------------------------------------------
+
+to setup-dispatchers
+  let personas (list
+    (list "Veteran"  "You are a 20-year veteran dispatcher. You've seen every kind of crisis and tend to be calm and measured. You look past alarming keywords to assess actual impact. You rarely escalate unless the described consequences are truly life-threatening.")
+    (list "Rookie"   "You are a new dispatcher in your first year. You are cautious and tend to escalate when uncertain. You sometimes over-react to scary-sounding language but are learning to focus on described impact rather than keywords.")
+    (list "Analyst"  "You are a data-driven analyst dispatcher. You focus on quantifiable impact: how many people affected, what infrastructure is at risk, what cascading failures could occur. You ignore emotional language and assess purely on described consequences.")
+  )
+
+  let px -14
+  foreach personas [ p ->
+    create-dispatchers 1 [
+      set persona-name item 0 p
+      set persona-prompt item 1 p
+      set my-triaged 0
+      set my-correct-triage 0
+      set my-routed 0
+      set my-correct-route 0
+      set shape "person"
+      set size 2.5
+      set color blue + 2
+      setxy px 14
+      set label persona-name
+      set px px + 7
+
+      ;; Inject persona via llm:set-history if LLM is active
+      if llm-ready? and use-llm? [
+        carefully [
+          llm:set-history (list
+            (list "system" persona-prompt)
+          )
+        ] [
+          output-print (word "[SETUP] Failed to set history for " persona-name ": " error-message)
+        ]
+      ]
+    ]
+  ]
+end
+
+;; ---------------------------------------------------------------------------
+;; Setup Responders (3 BASIC cap=3, 3 EXPERT cap=2, 3 COORDINATOR cap=1)
+;; ---------------------------------------------------------------------------
+
 to setup-responders
-  create-basic-agents 7 [
-    set tier "basic"
-    set capacity 2
+  let base-x -12
+  ;; BASIC responders
+  create-responders 3 [
+    set tier "BASIC"
+    set capacity 3
     set current-load 0
-    set processed-count 0
-    set color 57
-    set size 1.6
+    set resolved-count 0
     set shape "circle"
-    setxy (-13 + random-float 6) (-12 + random-float 24)
+    set size 1.5
+    set color green + 1
+    set label "B"
+  ]
+  let idx 0
+  ask responders with [ tier = "BASIC" ] [
+    setxy (base-x + idx * 3) -12
+    set idx idx + 1
   ]
 
-  create-expert-agents 4 [
-    set tier "expert"
+  ;; EXPERT responders
+  create-responders 3 [
+    set tier "EXPERT"
     set capacity 2
     set current-load 0
-    set processed-count 0
-    set color 15
-    set size 1.8
+    set resolved-count 0
     set shape "circle"
-    setxy (-3 + random-float 6) (-12 + random-float 24)
+    set size 1.8
+    set color orange + 1
+    set label "E"
+  ]
+  set idx 0
+  ask responders with [ tier = "EXPERT" ] [
+    setxy (base-x + 10 + idx * 3) -12
+    set idx idx + 1
   ]
 
-  create-coordinators 2 [
-    set tier "coordinator"
-    set capacity 3
+  ;; COORDINATOR responders
+  create-responders 3 [
+    set tier "COORDINATOR"
+    set capacity 1
     set current-load 0
-    set processed-count 0
-    set color 105
-    set size 2.1
+    set resolved-count 0
     set shape "circle"
-    setxy (8 + random-float 6) (-12 + random-float 24)
+    set size 2.1
+    set color violet + 1
+    set label "C"
+  ]
+  set idx 0
+  ask responders with [ tier = "COORDINATOR" ] [
+    setxy (base-x + 20 + idx * 3) -12
+    set idx idx + 1
   ]
 end
 
-to create-initial-cases [n]
-  repeat n [ spawn-random-case ]
+;; ---------------------------------------------------------------------------
+;; Incident Bank (30 incidents: 10 misleading + 10 clear + 10 borderline)
+;; ---------------------------------------------------------------------------
+
+to build-incident-bank
+  ;; Each entry: [summary impact ground-truth-severity ground-truth-tier category]
+  ;; MISLEADING: keywords suggest one severity but actual impact warrants another
+  set incident-bank (list
+    ;; --- MISLEADING (10): keywords mislead naive classifiers ---
+    (list "Server room fire alarm triggered"
+          "Sensor malfunction confirmed; no smoke, no heat, systems normal"
+          "LOW" "BASIC" "misleading")
+    (list "Toxic chemical spill reported at school"
+          "Custodian knocked over a bottle of vinegar in the cafeteria; no injuries"
+          "LOW" "BASIC" "misleading")
+    (list "Building collapse alert downtown"
+          "Scaffolding panel fell on empty sidewalk at 3 AM; no structural damage to building"
+          "LOW" "BASIC" "misleading")
+    (list "Mass casualty incident at concert venue"
+          "Three people fainted from heat; all conscious, first aid on scene"
+          "MODERATE" "BASIC" "misleading")
+    (list "Explosion heard near hospital"
+          "Transformer blew on adjacent street; hospital on backup power, no injuries"
+          "MODERATE" "EXPERT" "misleading")
+    (list "Data center cooling failure"
+          "Regional hospital patient records, 911 dispatch system, and pharmacy networks all depend on this center; 30 minutes to critical thermal threshold"
+          "CRITICAL" "COORDINATOR" "misleading")
+    (list "Minor water leak in basement"
+          "Leak is in the electrical vault supplying the neonatal ICU; backup generators have 45 minutes of fuel"
+          "CRITICAL" "COORDINATOR" "misleading")
+    (list "Small kitchen fire at restaurant"
+          "Fire spreading to adjacent apartment building; 40 residents trapped above; fire department 20 minutes away"
+          "CRITICAL" "COORDINATOR" "misleading")
+    (list "Routine power fluctuation reported"
+          "Affecting traffic signals across 12 intersections during school dismissal; two near-miss accidents already"
+          "HIGH" "EXPERT" "misleading")
+    (list "Dog loose on highway"
+          "Causing multi-vehicle chain reaction on I-95; 6 cars involved, injuries reported, highway blocked both directions"
+          "HIGH" "EXPERT" "misleading")
+
+    ;; --- CLEAR (10): keywords and impact align ---
+    (list "Multi-vehicle pileup on interstate"
+          "12 vehicles, multiple injuries confirmed, highway fully blocked, EMS requesting additional units"
+          "CRITICAL" "COORDINATOR" "clear")
+    (list "Warehouse fire with toxic plume"
+          "Residential area downwind being evacuated; 500+ people displaced; air quality hazardous"
+          "CRITICAL" "COORDINATOR" "clear")
+    (list "Earthquake damage to bridge"
+          "Visible structural cracks; bridge closed; 50,000 daily commuters affected; engineers en route"
+          "CRITICAL" "COORDINATOR" "clear")
+    (list "School bus accident with injuries"
+          "Bus overturned; 8 children with minor-moderate injuries; parents arriving at scene"
+          "HIGH" "EXPERT" "clear")
+    (list "Chemical plant pressure valve failure"
+          "Controlled venting in progress; shelter-in-place advisory for 2-mile radius; monitoring air quality"
+          "HIGH" "EXPERT" "clear")
+    (list "Hospital generator test failure"
+          "Backup generator failed routine test; primary power stable; repair crew dispatched for same-day fix"
+          "MODERATE" "BASIC" "clear")
+    (list "Broken water main on residential street"
+          "Low-pressure water to 30 homes; repair crew en route; estimated 4-hour fix"
+          "MODERATE" "BASIC" "clear")
+    (list "Traffic signal malfunction at intersection"
+          "Single intersection flashing red; police directing traffic; no accidents"
+          "LOW" "BASIC" "clear")
+    (list "Park trail flooding after rain"
+          "Trails closed; no hikers in area; water receding naturally"
+          "LOW" "BASIC" "clear")
+    (list "Streetlight outage on residential block"
+          "Six streetlights out; residents notified; maintenance scheduled for morning"
+          "LOW" "BASIC" "clear")
+
+    ;; --- BORDERLINE (10): genuinely ambiguous, reasonable people could disagree ---
+    (list "Subway train stalled between stations"
+          "200 passengers stuck for 25 minutes; ventilation working; rescue train dispatched; some passengers anxious"
+          "MODERATE" "EXPERT" "borderline")
+    (list "Power outage at nursing home"
+          "Backup generator active; 60 residents comfortable; generator fuel for 8 hours; utility ETA unknown"
+          "HIGH" "EXPERT" "borderline")
+    (list "Gas smell reported near elementary school"
+          "School in session; gas company en route; no readings yet; precautionary evacuation being considered"
+          "HIGH" "EXPERT" "borderline")
+    (list "Protest blocking major intersection"
+          "500 people; peaceful but not dispersing; ambulance rerouting adds 8 minutes to hospital route"
+          "MODERATE" "EXPERT" "borderline")
+    (list "Crane malfunction at construction site"
+          "Crane arm stuck over occupied building; no immediate danger but wind advisory in effect for afternoon"
+          "HIGH" "EXPERT" "borderline")
+    (list "River level rising near flood stage"
+          "2 feet below flood level; rain expected to continue 6 hours; 200 homes in potential flood zone"
+          "HIGH" "COORDINATOR" "borderline")
+    (list "Suspicious package at government building"
+          "Building evacuated; bomb squad 15 minutes away; 300 workers displaced; likely false alarm based on description"
+          "MODERATE" "EXPERT" "borderline")
+    (list "Internet outage affecting emergency services"
+          "911 calls routing to backup center; 12-second additional delay per call; estimated 2-hour repair"
+          "HIGH" "EXPERT" "borderline")
+    (list "Heat wave shelter capacity reached"
+          "Main cooling center full at 150 people; overflow into library planned; 3 elderly residents showing heat stress"
+          "MODERATE" "EXPERT" "borderline")
+    (list "Airport runway incursion reported"
+          "Ground vehicle crossed active runway; no aircraft in immediate path; runway closed for inspection"
+          "MODERATE" "EXPERT" "borderline")
+  )
 end
 
-to spawn-random-case
-  let incident-bank (list
-    (list "Server room smoke alarm" "Power instability in two hospital wings")
-    (list "Water main rupture" "Transit junction flooded during rush hour")
-    (list "School bus collision" "Multiple injuries and blocked arterial road")
-    (list "Warehouse fire flare-up" "Toxic plume reported near residential area")
-    (list "Regional telecom outage" "Emergency call latency above safe threshold")
-    (list "Chemical lab leak" "Evacuation radius requested by fire command")
-    (list "Bridge vibration alert" "Potential structural failure during peak traffic")
-    (list "Heat wave brownout" "Critical care equipment on backup power")
-    (list "Subway security incident" "Crowd panic and platform injuries")
-    (list "Data center cooling loss" "City payment systems offline")
-  )
+;; ===========================================================================
+;; GO LOOP
+;; ===========================================================================
 
-  let picked one-of incident-bank
-  create-cases 1 [
-    set tier "case"
-    set capacity 0
-    set current-load 0
-    set processed-count 0
-
-    set incident-summary item 0 picked
-    set reported-impact item 1 picked
-    set severity-band "unassessed"
-    set severity-score -1
-    set queue-state "new"
-    set assigned-tier "none"
-    set assigned-agent nobody
-    set handling-notes ""
-    set created-at ticks
+to go
+  ;; Episode boundary check
+  handle-episode-boundary
 
-    set color yellow
-    set size 1.3
-    set shape "circle"
-    setxy (random-xcor) (max-pycor - random-float 6)
+  ;; Spawn new incidents
+  if random 100 < incident-rate [
+    spawn-incident
   ]
-end
 
-to go
-  if random-float 1 < case-arrival-probability [
-    spawn-random-case
+  ;; Dispatchers triage and route
+  ask dispatchers [
+    triage-my-incidents
+    route-my-incidents
   ]
 
-  triage-new-cases
-  route-triaged-cases
-  coordinator-rebalance
-  process-assigned-cases
+  ;; Responders process active cases
+  process-active-cases
 
-  tick
-end
+  ;; Check deadlines
+  check-deadlines
 
-to triage-new-cases
-  ask cases with [queue-state = "new"] [
-    perform-triage
+  ;; Reflection at intervals
+  if reflection-interval > 0 and ticks > 0 and ticks mod reflection-interval = 0 [
+    ask dispatchers [
+      dispatcher-reflect
+    ]
   ]
+
+  set episode-tick-counter episode-tick-counter + 1
+  tick
 end
 
-to perform-triage
-  let llm-response ""
+;; ===========================================================================
+;; INCIDENT SPAWNING
+;; ===========================================================================
 
-  if llm-ready? [
-    carefully [
-      set llm-response llm:chat-with-template triage-template-path (list
-        (list "incident" incident-summary)
-        (list "impact" reported-impact)
-        (list "elapsed_ticks" (word ticks))
-        (list "known_context" "Municipal crisis operations center with three response tiers")
-      )
-    ] [
-      set llm-response ""
-    ]
-  ]
+to spawn-incident
+  let picked one-of incident-bank
+  create-incidents 1 [
+    set summary       item 0 picked
+    set impact        item 1 picked
+    set ground-truth-severity item 2 picked
+    set ground-truth-tier     item 3 picked
+    set incident-category     item 4 picked
+    set assessed-severity ""
+    set assessed-tier     ""
+    set queue-state       "new"
+    set triage-correct?   false
+    set route-correct?    false
+    set created-at        ticks
+    set assigned-responder nobody
+
+    ;; Deadline: severity-dependent time window
+    let window severity-deadline ground-truth-severity
+    set deadline ticks + window
 
-  if llm-response = "" [
-    set llm-response heuristic-severity-report incident-summary reported-impact
+    set shape "circle"
+    set size 1.0
+    set color yellow
+    setxy (random-xcor * 0.5) (9 + random 3)
+    set label ""
   ]
+end
 
-  set severity-band extract-severity-label llm-response incident-summary reported-impact
-  set severity-score severity-score-from-band severity-band
-  set queue-state "triaged"
-  set handling-notes (word "TRIAGE " llm-response)
-  set color color-for-band severity-band
+;; Manual incident injection button
+to add-incident
+  spawn-incident
+  output-print "[MANUAL] Incident added"
 end
 
-to-report heuristic-severity-report [summary impact]
-  let merged (word summary " " impact)
+to-report severity-deadline [ sev ]
+  if sev = "LOW"      [ report 30 ]
+  if sev = "MODERATE"  [ report 20 ]
+  if sev = "HIGH"      [ report 12 ]
+  report 8  ;; CRITICAL
+end
 
-  if (position "collision" merged != false)
-     or (position "toxic" merged != false)
-     or (position "evacuation" merged != false)
-     or (position "critical care" merged != false)
-     or (position "structural" merged != false) [
-    report "SEVERITY: CRITICAL"
-  ]
+;; ===========================================================================
+;; TRIAGE (dispatchers assess severity via llm:chat-with-template)
+;; ===========================================================================
 
-  if (position "fire" merged != false)
-     or (position "outage" merged != false)
-     or (position "flooded" merged != false)
-     or (position "injuries" merged != false) [
-    report "SEVERITY: HIGH"
+to triage-my-incidents
+  ;; Each dispatcher picks one untriaged incident per tick
+  let target one-of incidents with [ queue-state = "new" ]
+  if target = nobody [ stop ]
+
+  let sev ""
+
+  ifelse llm-ready? and use-llm? [
+    ;; LLM triage via template
+    carefully [
+      let response llm:chat-with-template triage-template-path (list
+        (list "persona" persona-prompt)
+        (list "episode" (word current-episode))
+        (list "tick"    (word ticks))
+        (list "incident" [summary] of target)
+        (list "impact"   [impact] of target)
+      )
+      set sev extract-severity response
+      output-print (word "[TRIAGE:" persona-name "] " [summary] of target " -> " sev)
+    ] [
+      output-print (word "[TRIAGE:" persona-name "] LLM failed: " error-message)
+      set sev ""
+    ]
+  ] [
+    ;; Heuristic triage (naive keyword matching — deliberately bad on misleading cases)
+    set sev heuristic-triage [summary] of target [impact] of target
+    output-print (word "[TRIAGE:heuristic] " [summary] of target " -> " sev)
   ]
 
-  report "SEVERITY: MODERATE"
-end
+  ;; Fallback if empty
+  if sev = "" [ set sev "MODERATE" ]
 
-to-report extract-severity-label [assessment summary impact]
-  let text (word assessment " " summary " " impact)
+  ;; Score
+  let truth [ground-truth-severity] of target
+  let is-correct? (sev = truth)
 
-  if (position "CRITICAL" text != false) or (position "critical" text != false) [
-    report "critical"
+  set total-triaged total-triaged + 1
+  set my-triaged my-triaged + 1
+  if is-correct? [
+    set correct-triage correct-triage + 1
+    set my-correct-triage my-correct-triage + 1
   ]
-
-  if (position "HIGH" text != false) or (position "high" text != false) [
-    report "high"
+  if [incident-category] of target = "misleading" [
+    set misleading-triaged misleading-triaged + 1
+    if is-correct? [ set misleading-correct misleading-correct + 1 ]
   ]
 
-  if (position "MODERATE" text != false) or (position "moderate" text != false) [
-    report "moderate"
+  ask target [
+    set assessed-severity sev
+    set triage-correct? is-correct?
+    set queue-state "triaged"
+    set color severity-color sev
+    setxy xcor (3 + random 3)
   ]
+end
 
-  if (position "LOW" text != false) or (position "low" text != false) [
-    report "low"
-  ]
+;; Heuristic triage: deliberately naive keyword matching
+to-report heuristic-triage [ s i ]
+  let text (word s " " i)
+  ;; Keywords that trigger high severity regardless of actual impact
+  if has-word? text "fire"       [ report "CRITICAL" ]
+  if has-word? text "explosion"  [ report "CRITICAL" ]
+  if has-word? text "collapse"   [ report "CRITICAL" ]
+  if has-word? text "toxic"      [ report "CRITICAL" ]
+  if has-word? text "casualty"   [ report "CRITICAL" ]
+  if has-word? text "chemical"   [ report "HIGH" ]
+  if has-word? text "trapped"    [ report "CRITICAL" ]
+  if has-word? text "spill"      [ report "HIGH" ]
+  if has-word? text "suspicious" [ report "HIGH" ]
+  if has-word? text "earthquake" [ report "CRITICAL" ]
+  if has-word? text "flood"      [ report "HIGH" ]
+  if has-word? text "outage"     [ report "HIGH" ]
+  if has-word? text "injuries"   [ report "HIGH" ]
+  if has-word? text "accident"   [ report "HIGH" ]
+  if has-word? text "alarm"      [ report "HIGH" ]
+  if has-word? text "evacuat"    [ report "CRITICAL" ]
+  ;; Default for anything without scary keywords
+  report "MODERATE"
+end
 
-  report "moderate"
+to-report has-word? [ text word-fragment ]
+  report position word-fragment text != false or position (lower-case-first word-fragment) text != false
 end
 
-to-report severity-score-from-band [band]
-  if band = "low" [ report 25 ]
-  if band = "moderate" [ report 55 ]
-  if band = "high" [ report 80 ]
-  report 95
+to-report lower-case-first [ s ]
+  ;; Simple helper: just return the string as-is since NetLogo string matching is case-sensitive
+  ;; and our keywords are already lowercase
+  report s
 end
 
-to route-triaged-cases
-  let queue sort-by [[a b] -> [severity-score] of a > [severity-score] of b] (sort (cases with [queue-state = "triaged"]))
-  foreach queue [ queued-case ->
-    dispatch-case queued-case
-  ]
+to-report extract-severity [ response ]
+  if position "CRITICAL" response != false [ report "CRITICAL" ]
+  if position "HIGH" response != false     [ report "HIGH" ]
+  if position "MODERATE" response != false [ report "MODERATE" ]
+  if position "LOW" response != false      [ report "LOW" ]
+  report ""
+end
+
+to-report severity-color [ sev ]
+  if sev = "LOW"      [ report 55 ]  ;; green
+  if sev = "MODERATE"  [ report 45 ]  ;; yellow-green
+  if sev = "HIGH"      [ report 25 ]  ;; orange
+  if sev = "CRITICAL"  [ report 15 ]  ;; red
+  report 5  ;; grey
 end
 
-to dispatch-case [target-case]
-  let preferred-tier dispatch-recommendation target-case
-  let final-tier available-tier preferred-tier
+;; ===========================================================================
+;; ROUTING (dispatchers route via llm:choose)
+;; ===========================================================================
 
-  if final-tier = "hold" [
-    ask target-case [
-      if position "waiting-capacity" handling-notes = false [
-        set handling-notes (word handling-notes " | waiting-capacity")
-      ]
+to route-my-incidents
+  let target one-of incidents with [ queue-state = "triaged" ]
+  if target = nobody [ stop ]
+
+  let chosen-tier ""
+  let choices (list "BASIC" "EXPERT" "COORDINATOR" "HOLD")
+
+  ifelse llm-ready? and use-llm? [
+    ;; LLM routing via llm:choose
+    carefully [
+      let prompt (word
+        "Incident: " [summary] of target "\n"
+        "Severity: " [assessed-severity] of target "\n"
+        "Impact: " [impact] of target "\n"
+        "Current load — BASIC: " count-active-tier "BASIC"
+        ", EXPERT: " count-active-tier "EXPERT"
+        ", COORDINATOR: " count-active-tier "COORDINATOR" "\n"
+        "Routing rules based on severity:\n"
+        " - LOW severity -> BASIC\n"
+        " - MODERATE severity -> BASIC (or EXPERT if BASIC is full)\n"
+        " - HIGH severity -> EXPERT\n"
+        " - CRITICAL severity -> COORDINATOR\n"
+        " - HOLD only if the appropriate tier AND all higher tiers are at capacity.\n"
+        "The assessed severity for this incident is " [assessed-severity] of target ". Apply the rules above."
+      )
+      set chosen-tier llm:choose prompt choices
+      output-print (word "[ROUTE:" persona-name "] " [summary] of target " -> " chosen-tier)
+    ] [
+      output-print (word "[ROUTE:" persona-name "] LLM choose failed: " error-message)
+      set chosen-tier ""
     ]
-    stop
+  ] [
+    ;; Heuristic routing
+    set chosen-tier heuristic-route [assessed-severity] of target
+    output-print (word "[ROUTE:heuristic] " [summary] of target " -> " chosen-tier)
   ]
 
-  let worker select-worker final-tier
-  if worker = nobody [ stop ]
+  if chosen-tier = "" [ set chosen-tier heuristic-route [assessed-severity] of target ]
+  if chosen-tier = "HOLD" [
+    output-print (word "[HOLD] " [summary] of target " — waiting for capacity")
+    stop
+  ]
 
-  if final-tier != preferred-tier [
-    set escalated-count escalated-count + 1
+  ;; Find available responder in chosen tier
+  let worker find-responder chosen-tier
+  if worker = nobody [
+    ;; Try escalation
+    set worker find-responder escalation-tier chosen-tier
+    if worker != nobody [
+      set total-escalated total-escalated + 1
+      set chosen-tier [tier] of worker
+    ]
+  ]
+  if worker = nobody [ stop ]  ;; No capacity anywhere
+
+  ;; Score routing
+  let truth [ground-truth-tier] of target
+  let is-correct? (chosen-tier = truth)
+  set total-routed total-routed + 1
+  set my-routed my-routed + 1
+  if is-correct? [
+    set correct-route correct-route + 1
+    set my-correct-route my-correct-route + 1
   ]
 
   ask worker [
     set current-load current-load + 1
   ]
 
-  ask target-case [
-    set queue-state "assigned"
-    set assigned-tier final-tier
-    set assigned-agent worker
-    set color color-for-tier final-tier
-    set handling-notes (word handling-notes " | routed:" final-tier)
-    set ycor ycor - 4
+  ask target [
+    set assessed-tier chosen-tier
+    set route-correct? is-correct?
+    set queue-state "active"
+    set assigned-responder worker
+    ;; Move toward responder zone
+    setxy ([xcor] of worker + random-float 2 - 1) ([ycor] of worker + 3)
+    set label ""
   ]
 end
 
-to-report dispatch-recommendation [target-case]
-  let default-tier severity-to-default-tier [severity-band] of target-case
+to-report heuristic-route [ sev ]
+  if sev = "LOW"      [ report "BASIC" ]
+  if sev = "MODERATE"  [ report "BASIC" ]
+  if sev = "HIGH"      [ report "EXPERT" ]
+  report "COORDINATOR"
+end
 
-  if not llm-ready? [
-    report default-tier
-  ]
+to-report escalation-tier [ current-tier ]
+  if current-tier = "BASIC"       [ report "EXPERT" ]
+  if current-tier = "EXPERT"      [ report "COORDINATOR" ]
+  report "COORDINATOR"
+end
 
-  let llm-response ""
-  carefully [
-    set llm-response llm:chat-with-template dispatcher-template-path (list
-      (list "severity" [severity-band] of target-case)
-      (list "incident" [incident-summary] of target-case)
-      (list "basic_load" (word count cases with [queue-state = "assigned" and assigned-tier = "basic"]))
-      (list "expert_load" (word count cases with [queue-state = "assigned" and assigned-tier = "expert"]))
-      (list "coordinator_load" (word count cases with [queue-state = "assigned" and assigned-tier = "coordinator"]))
-    )
+to-report find-responder [ tier-name ]
+  let candidates responders with [ tier = tier-name and current-load < capacity ]
+  ifelse any? candidates [
+    report min-one-of candidates [ current-load ]
   ] [
-    set llm-response ""
+    report nobody
   ]
-
-  if llm-response = "" [ report default-tier ]
-
-  let chosen extract-route-label llm-response
-  if chosen = "unknown" [ report default-tier ]
-  report chosen
 end
 
-to-report extract-route-label [response]
-  if (position "COORDINATOR" response != false) or (position "coordinator" response != false) [
-    report "coordinator"
-  ]
+to-report count-active-tier [ tier-name ]
+  report count incidents with [ queue-state = "active" and assessed-tier = tier-name ]
+end
 
-  if (position "EXPERT" response != false) or (position "expert" response != false) [
-    report "expert"
-  ]
+;; ===========================================================================
+;; PROCESSING + DEADLINES
+;; ===========================================================================
 
-  if (position "BASIC" response != false) or (position "basic" response != false) [
-    report "basic"
+to process-active-cases
+  ask incidents with [ queue-state = "active" ] [
+    let chance completion-probability assessed-tier
+    if random-float 1 < chance [
+      resolve-incident self
+    ]
   ]
-
-  report "unknown"
 end
 
-to-report severity-to-default-tier [band]
-  if band = "low" [ report "basic" ]
-  if band = "moderate" [ report "expert" ]
-  if band = "high" [ report "expert" ]
-  report "coordinator"
+to-report completion-probability [ tier-name ]
+  if tier-name = "BASIC"       [ report 0.15 ]
+  if tier-name = "EXPERT"      [ report 0.20 ]
+  if tier-name = "COORDINATOR" [ report 0.25 ]
+  report 0.10
 end
 
-to-report available-tier [preferred-tier]
-  if preferred-tier = "basic" [
-    if any? basic-agents with [current-load < capacity] [ report "basic" ]
-    if any? expert-agents with [current-load < capacity] [ report "expert" ]
-    if any? coordinators with [current-load < capacity] [ report "coordinator" ]
-    report "hold"
-  ]
-
-  if preferred-tier = "expert" [
-    if any? expert-agents with [current-load < capacity] [ report "expert" ]
-    if any? coordinators with [current-load < capacity] [ report "coordinator" ]
-    if any? basic-agents with [current-load < capacity] [ report "basic" ]
-    report "hold"
+to resolve-incident [ inc ]
+  let worker [assigned-responder] of inc
+  if worker != nobody [
+    ask worker [
+      set current-load max (list 0 (current-load - 1))
+      set resolved-count resolved-count + 1
+    ]
   ]
 
-  if any? coordinators with [current-load < capacity] [ report "coordinator" ]
-  if any? expert-agents with [current-load < capacity] [ report "expert" ]
-  report "hold"
-end
+  set total-resolved total-resolved + 1
+  set total-response-ticks total-response-ticks + (ticks - [created-at] of inc)
 
-to-report select-worker [tier-name]
-  if tier-name = "basic" [
-    if any? basic-agents with [current-load < capacity] [
-      report min-one-of basic-agents with [current-load < capacity] [current-load]
-    ]
+  ask inc [
+    set queue-state "resolved"
+    set color grey + 2
+    set size 0.6
+    setxy xcor (-15 + random-float 1)
+    set label ""
   ]
+end
 
-  if tier-name = "expert" [
-    if any? expert-agents with [current-load < capacity] [
-      report min-one-of expert-agents with [current-load < capacity] [current-load]
+to check-deadlines
+  ask incidents with [ queue-state = "active" and ticks > deadline ] [
+    set queue-state "late"
+    set total-late total-late + 1
+    set color magenta
+    output-print (word "[LATE] " summary " — exceeded deadline at tick " ticks)
+
+    ;; Try to escalate late cases
+    let current-tier assessed-tier
+    let higher-tier escalation-tier current-tier
+    if higher-tier != current-tier [
+      let new-worker find-responder higher-tier
+      if new-worker != nobody [
+        ;; Release old responder
+        if assigned-responder != nobody [
+          ask assigned-responder [
+            set current-load max (list 0 (current-load - 1))
+          ]
+        ]
+        ask new-worker [ set current-load current-load + 1 ]
+        set assigned-responder new-worker
+        set assessed-tier higher-tier
+        set queue-state "active"
+        set total-escalated total-escalated + 1
+        output-print (word "[ESCALATE] " summary " -> " higher-tier)
+      ]
     ]
   ]
 
-  if tier-name = "coordinator" [
-    if any? coordinators with [current-load < capacity] [
-      report min-one-of coordinators with [current-load < capacity] [current-load]
+  ;; Also let late-but-still-processing cases resolve
+  ask incidents with [ queue-state = "late" ] [
+    let chance completion-probability assessed-tier
+    if random-float 1 < chance [
+      resolve-incident self
     ]
   ]
-
-  report nobody
 end
 
-to coordinator-rebalance
-  if not any? coordinators [ stop ]
+;; ===========================================================================
+;; REFLECTION (dispatchers reflect on performance via llm:chat)
+;; ===========================================================================
 
-  let risky-basic one-of cases with [
-    queue-state = "assigned" and
-    assigned-tier = "basic" and
-    severity-score >= 70
-  ]
-  if risky-basic != nobody [
-    reassign-case risky-basic "expert" "risk escalation"
-  ]
+to dispatcher-reflect
+  if not llm-ready? or not use-llm? [ stop ]
+  if my-triaged = 0 [ stop ]
 
-  let critical-expert one-of cases with [
-    queue-state = "assigned" and
-    assigned-tier = "expert" and
-    severity-score >= 90
+  ;; Only reflect if enough history accumulated
+  let hist-len 0
+  carefully [
+    set hist-len length llm:history
+  ] [
+    set hist-len 0
   ]
-  if critical-expert != nobody [
-    reassign-case critical-expert "coordinator" "critical escalation"
+  if hist-len < 4 [ stop ]
+
+  let my-triage-acc ifelse-value (my-triaged > 0) [ precision (my-correct-triage / my-triaged * 100) 1 ] [ 0 ]
+  let my-route-acc  ifelse-value (my-routed > 0)  [ precision (my-correct-route / my-routed * 100) 1 ] [ 0 ]
+
+  carefully [
+    let reflection llm:chat (word
+      "REFLECTION — You are " persona-name " dispatcher. Review your performance:\n"
+      "Triage accuracy: " my-triage-acc "% (" my-correct-triage "/" my-triaged ")\n"
+      "Routing accuracy: " my-route-acc "% (" my-correct-route "/" my-routed ")\n"
+      "Episode: " current-episode ", Tick: " ticks "\n"
+      "What patterns are you noticing? What would you do differently? "
+      "Keep your reflection to 2-3 sentences."
+    )
+    output-print (word "[REFLECT:" persona-name "] " reflection)
+  ] [
+    output-print (word "[REFLECT:" persona-name "] Failed: " error-message)
   ]
 end
 
-to reassign-case [target-case new-tier reason]
-  if [assigned-tier] of target-case = new-tier [ stop ]
-
-  let new-worker select-worker new-tier
-  if new-worker = nobody [ stop ]
+;; Manual reflection trigger
+to force-reflect
+  ask dispatchers [ dispatcher-reflect ]
+end
 
-  let old-worker [assigned-agent] of target-case
-  if old-worker != nobody [
-    ask old-worker [
-      set current-load max (list 0 (current-load - 1))
+;; ===========================================================================
+;; EPISODE BOUNDARY + MEMORY MANAGEMENT
+;; ===========================================================================
+
+to handle-episode-boundary
+  if episode-length = 0 [ stop ]  ;; No episode boundaries
+  if episode-tick-counter < episode-length [ stop ]
+
+  ;; Episode ended
+  set current-episode current-episode + 1
+  set episode-tick-counter 0
+  output-print (word "[EPISODE] Starting episode " current-episode " | Memory mode: " memory-mode)
+
+  ask dispatchers [
+    if memory-mode = "per-episode" [
+      ;; Clear and re-inject persona
+      carefully [
+        llm:clear-history
+        llm:set-history (list
+          (list "system" persona-prompt)
+        )
+        output-print (word "[MEMORY:" persona-name "] History cleared, persona re-injected")
+      ] [
+        output-print (word "[MEMORY:" persona-name "] Reset failed: " error-message)
+      ]
     ]
+    if memory-mode = "none" [
+      ;; Clear everything every episode
+      carefully [
+        llm:clear-history
+        output-print (word "[MEMORY:" persona-name "] History fully cleared")
+      ] [
+        output-print (word "[MEMORY:" persona-name "] Clear failed: " error-message)
+      ]
+    ]
+    ;; "persistent" mode: do nothing, history accumulates
   ]
+end
 
-  ask new-worker [
-    set current-load current-load + 1
-  ]
+;; ===========================================================================
+;; METRIC REPORTERS
+;; ===========================================================================
 
-  ask target-case [
-    set assigned-tier new-tier
-    set assigned-agent new-worker
-    set color color-for-tier new-tier
-    set handling-notes (word handling-notes " | coordinator-reassign:" reason)
-  ]
+to-report triage-accuracy
+  ifelse total-triaged > 0
+    [ report precision (correct-triage / total-triaged * 100) 1 ]
+    [ report 0 ]
+end
 
-  set escalated-count escalated-count + 1
+to-report route-accuracy
+  ifelse total-routed > 0
+    [ report precision (correct-route / total-routed * 100) 1 ]
+    [ report 0 ]
 end
 
-to process-assigned-cases
-  ask cases with [queue-state = "assigned"] [
-    let completion completion-chance assigned-tier severity-band
-    if random-float 1 < completion [
-      finalize-case self
-    ]
-  ]
+to-report late-rate
+  let total-dispatched total-routed
+  ifelse total-dispatched > 0
+    [ report precision (total-late / total-dispatched * 100) 1 ]
+    [ report 0 ]
 end
 
-to-report completion-chance [tier-name band]
-  if tier-name = "basic" [ report 0.12 ]
-  if tier-name = "expert" [
-    if band = "high" [ report 0.27 ]
-    if band = "critical" [ report 0.2 ]
-    report 0.22
-  ]
+to-report escalation-rate
+  ifelse total-routed > 0
+    [ report precision (total-escalated / total-routed * 100) 1 ]
+    [ report 0 ]
+end
 
-  if band = "critical" [ report 0.34 ]
-  report 0.28
+to-report avg-response-time
+  ifelse total-resolved > 0
+    [ report precision (total-response-ticks / total-resolved) 1 ]
+    [ report 0 ]
 end
 
-to finalize-case [target-case]
-  let tier-name [assigned-tier] of target-case
-  let worker [assigned-agent] of target-case
+to-report misleading-accuracy
+  ifelse misleading-triaged > 0
+    [ report precision (misleading-correct / misleading-triaged * 100) 1 ]
+    [ report 0 ]
+end
 
-  if worker != nobody [
-    ask worker [
-      set current-load max (list 0 (current-load - 1))
-      set processed-count processed-count + 1
-    ]
-  ]
+to-report persona-accuracy-report
+  report (word
+    map [ d ->
+      (word [persona-name] of d ": "
+        ifelse-value ([my-triaged] of d > 0)
+          [ (word precision ([my-correct-triage] of d / [my-triaged] of d * 100) 0 "%") ]
+          [ "N/A" ]
+      )
+    ] sort dispatchers
+  )
+end
 
-  if tier-name = "basic" [
-    set processed-basic processed-basic + 1
-  ]
-  if tier-name = "expert" [
-    set processed-expert processed-expert + 1
-  ]
-  if tier-name = "coordinator" [
-    set processed-coordinator processed-coordinator + 1
-  ]
+to-report veteran-accuracy
+  let d one-of dispatchers with [persona-name = "Veteran"]
+  if d = nobody [ report "N/A" ]
+  ifelse [my-triaged] of d > 0
+    [ report (word precision ([my-correct-triage] of d / [my-triaged] of d * 100) 0 "%") ]
+    [ report "N/A" ]
+end
 
-  ask target-case [
-    set queue-state "resolved"
-    set color 7
-    set assigned-agent nobody
-    set ycor min-pycor + random-float 3
-    set label word "resolved " severity-band
+to-report rookie-accuracy
+  let d one-of dispatchers with [persona-name = "Rookie"]
+  if d = nobody [ report "N/A" ]
+  ifelse [my-triaged] of d > 0
+    [ report (word precision ([my-correct-triage] of d / [my-triaged] of d * 100) 0 "%") ]
+    [ report "N/A" ]
+end
+
+to-report analyst-accuracy
+  let d one-of dispatchers with [persona-name = "Analyst"]
+  if d = nobody [ report "N/A" ]
+  ifelse [my-triaged] of d > 0
+    [ report (word precision ([my-correct-triage] of d / [my-triaged] of d * 100) 0 "%") ]
+    [ report "N/A" ]
+end
+
+to-report llm-status
+  let result "N/A"
+  carefully [
+    set result (word llm:active)
+  ] [
+    ;; keep default
   ]
+  report result
 end
 
-to-report color-for-band [band]
-  if band = "low" [ report 45 ]
-  if band = "moderate" [ report 25 ]
-  if band = "high" [ report 15 ]
-  report 125
+to-report queue-new-count
+  report count incidents with [ queue-state = "new" ]
 end
 
-to-report color-for-tier [tier-name]
-  if tier-name = "basic" [ report 57 ]
-  if tier-name = "expert" [ report 15 ]
-  report 105
+to-report queue-triaged-count
+  report count incidents with [ queue-state = "triaged" ]
 end
 
-to-report resolve-existing-path [primary fallback]
-  if file-exists? primary [ report primary ]
-  if file-exists? fallback [ report fallback ]
-  report primary
+to-report queue-active-count
+  report count incidents with [ queue-state = "active" or queue-state = "late" ]
+end
+
+to-report queue-resolved-count
+  report count incidents with [ queue-state = "resolved" ]
 end
 ]]></code>
   <widgets>
-    <view x="230" wrappingAllowedX="true" y="10" frameRate="30.0" minPycor="-16" height="518" showTickCounter="true" patchSize="15.0" fontSize="10" wrappingAllowedY="true" width="517" tickCounterLabel="ticks" maxPycor="16" updateMode="1" maxPxcor="16" minPxcor="-16"></view>
-    <button x="20" y="20" height="33" disableUntilTicks="false" forever="false" kind="Observer" display="setup" width="68" sizeVersion="0">setup</button>
-    <button x="96" y="20" height="33" disableUntilTicks="false" forever="true" kind="Observer" display="go" width="68" sizeVersion="0">go</button>
-    <button x="20" y="60" height="33" disableUntilTicks="false" forever="false" kind="Observer" display="new-case" width="144" sizeVersion="0">spawn-random-case</button>
-    <monitor x="20" precision="17" y="110" height="45" fontSize="11" display="LLM Active" width="143" sizeVersion="0">llm-ready?</monitor>
-    <monitor x="20" precision="17" y="160" height="45" fontSize="11" display="New Queue" width="144" sizeVersion="0">count cases with [queue-state = "new"]</monitor>
-    <monitor x="20" precision="17" y="210" height="45" fontSize="11" display="Triaged Queue" width="144" sizeVersion="0">count cases with [queue-state = "triaged"]</monitor>
-    <monitor x="20" precision="17" y="260" height="45" fontSize="11" display="Assigned Queue" width="144" sizeVersion="0">count cases with [queue-state = "assigned"]</monitor>
-    <monitor x="20" precision="17" y="310" height="45" fontSize="11" display="Escalations" width="144" sizeVersion="0">escalated-count</monitor>
-    <monitor x="20" precision="17" y="360" height="45" fontSize="11" display="Done by Basic" width="144" sizeVersion="0">processed-basic</monitor>
-    <monitor x="20" precision="17" y="410" height="45" fontSize="11" display="Done by Expert" width="144" sizeVersion="0">processed-expert</monitor>
-    <monitor x="20" precision="17" y="460" height="45" fontSize="11" display="Done by Coordinator" width="144" sizeVersion="0">processed-coordinator</monitor>
+    <!-- World view -->
+    <view x="310" wrappingAllowedX="false" y="10" frameRate="30.0"
+          minPycor="-16" height="498" showTickCounter="true"
+          patchSize="15.0" fontSize="10" wrappingAllowedY="false"
+          width="498" tickCounterLabel="ticks"
+          maxPycor="16" updateMode="1" maxPxcor="16" minPxcor="-16"></view>
+
+    <!-- Buttons -->
+    <button x="15" y="15" height="33" disableUntilTicks="false"
+            forever="false" kind="Observer" display="setup" width="90" sizeVersion="0">setup</button>
+    <button x="115" y="15" height="33" disableUntilTicks="false"
+            forever="true" kind="Observer" display="go" width="90" sizeVersion="0">go</button>
+    <button x="15" y="55" height="33" disableUntilTicks="false"
+            forever="false" kind="Observer" display="add incident" width="90" sizeVersion="0">add-incident</button>
+    <button x="115" y="55" height="33" disableUntilTicks="false"
+            forever="false" kind="Observer" display="force reflect" width="90" sizeVersion="0">force-reflect</button>
+
+    <!-- Switch: use-llm? -->
+    <switch x="15" y="100" height="33" variable="use-llm?" on="true"
+            width="190" sizeVersion="0" display="use-llm?"></switch>
+
+    <!-- Chooser: memory-mode -->
+    <chooser x="15" y="140" height="45" variable="memory-mode"
+             width="190" sizeVersion="0" display="memory-mode" current="0">
+      <choice type="string" value="persistent"></choice>
+      <choice type="string" value="per-episode"></choice>
+      <choice type="string" value="none"></choice>
+    </chooser>
+
+    <!-- Sliders -->
+    <slider x="15" y="195" height="33" variable="reflection-interval"
+            min="0" max="50" default="10.0" step="5"
+            width="190" sizeVersion="0" display="reflection-interval"
+            direction="Horizontal"></slider>
+    <slider x="15" y="235" height="33" variable="incident-rate"
+            min="0" max="100" default="30.0" step="5"
+            width="190" sizeVersion="0" display="incident-rate"
+            direction="Horizontal"></slider>
+    <slider x="15" y="275" height="33" variable="episode-length"
+            min="0" max="100" default="25.0" step="5"
+            width="190" sizeVersion="0" display="episode-length"
+            direction="Horizontal"></slider>
+
+    <!-- Monitors: Row 1 — LLM status and mode -->
+    <monitor x="15" precision="17" y="320" height="40" fontSize="9"
+             display="LLM Provider" width="190" sizeVersion="0">llm-status</monitor>
+    <monitor x="15" precision="17" y="360" height="40" fontSize="9"
+             display="Episode" width="90" sizeVersion="0">current-episode</monitor>
+    <monitor x="115" precision="17" y="360" height="40" fontSize="9"
+             display="Mode" width="90" sizeVersion="0">memory-mode</monitor>
+
+    <!-- Monitors: Row 2 — Queues -->
+    <monitor x="15" precision="17" y="405" height="40" fontSize="9"
+             display="New" width="60" sizeVersion="0">queue-new-count</monitor>
+    <monitor x="80" precision="17" y="405" height="40" fontSize="9"
+             display="Triaged" width="60" sizeVersion="0">queue-triaged-count</monitor>
+    <monitor x="145" precision="17" y="405" height="40" fontSize="9"
+             display="Active" width="60" sizeVersion="0">queue-active-count</monitor>
+
+    <!-- Monitors: Row 3 — Core metrics -->
+    <monitor x="15" precision="1" y="450" height="40" fontSize="9"
+             display="Triage Acc%" width="95" sizeVersion="0">triage-accuracy</monitor>
+    <monitor x="115" precision="1" y="450" height="40" fontSize="9"
+             display="Route Acc%" width="95" sizeVersion="0">route-accuracy</monitor>
+
+    <!-- Monitors: Row 4 — Additional metrics -->
+    <monitor x="15" precision="1" y="495" height="40" fontSize="9"
+             display="Misleading%" width="95" sizeVersion="0">misleading-accuracy</monitor>
+    <monitor x="115" precision="1" y="495" height="40" fontSize="9"
+             display="Avg Resp" width="95" sizeVersion="0">avg-response-time</monitor>
+
+    <!-- Monitors: Row 5 — Per-persona accuracy -->
+    <monitor x="15" precision="17" y="540" height="40" fontSize="9"
+             display="Veteran" width="65" sizeVersion="0">veteran-accuracy</monitor>
+    <monitor x="85" precision="17" y="540" height="40" fontSize="9"
+             display="Rookie" width="65" sizeVersion="0">rookie-accuracy</monitor>
+    <monitor x="155" precision="17" y="540" height="40" fontSize="9"
+             display="Analyst" width="55" sizeVersion="0">analyst-accuracy</monitor>
+
+    <!-- Monitors: Row 6 — Rates -->
+    <monitor x="15" precision="1" y="585" height="40" fontSize="9"
+             display="Late%" width="65" sizeVersion="0">late-rate</monitor>
+    <monitor x="85" precision="1" y="585" height="40" fontSize="9"
+             display="Escalation%" width="65" sizeVersion="0">escalation-rate</monitor>
+    <monitor x="155" precision="17" y="585" height="40" fontSize="9"
+             display="Resolved" width="55" sizeVersion="0">total-resolved</monitor>
+
+    <!-- Plot: Accuracy Over Time -->
+    <plot x="820" y="10" height="230" width="310" display="Accuracy Over Time"
+          xAxis="ticks" yAxis="%" xMin="0.0" xMax="10.0" yMin="0.0" yMax="100.0"
+          autoPlotX="true" autoPlotY="true" legend="true">
+      <setup></setup>
+      <update></update>
+      <pen display="Triage" interval="1.0" mode="0" color="-13345367" legend="true">
+        <setup></setup>
+        <update>plot triage-accuracy</update>
+      </pen>
+      <pen display="Route" interval="1.0" mode="0" color="-2674135" legend="true">
+        <setup></setup>
+        <update>plot route-accuracy</update>
+      </pen>
+      <pen display="Misleading" interval="1.0" mode="0" color="-5825686" legend="true">
+        <setup></setup>
+        <update>plot misleading-accuracy</update>
+      </pen>
+    </plot>
+
+    <!-- Plot: Case Flow -->
+    <plot x="820" y="250" height="230" width="310" display="Case Flow"
+          xAxis="ticks" yAxis="count" xMin="0.0" xMax="10.0" yMin="0.0" yMax="10.0"
+          autoPlotX="true" autoPlotY="true" legend="true">
+      <setup></setup>
+      <update></update>
+      <pen display="New" interval="1.0" mode="0" color="-1184463" legend="true">
+        <setup></setup>
+        <update>plot queue-new-count</update>
+      </pen>
+      <pen display="Active" interval="1.0" mode="0" color="-13345367" legend="true">
+        <setup></setup>
+        <update>plot queue-active-count</update>
+      </pen>
+      <pen display="Resolved" interval="1.0" mode="0" color="-7500403" legend="true">
+        <setup></setup>
+        <update>plot total-resolved</update>
+      </pen>
+      <pen display="Late" interval="1.0" mode="0" color="-2064490" legend="true">
+        <setup></setup>
+        <update>plot total-late</update>
+      </pen>
+    </plot>
+
+    <!-- Output area for log messages -->
+    <output x="820" y="490" height="130" width="310" fontSize="9"></output>
   </widgets>
-  <info>## Crisis Triage with Tiered Intelligence Coordination
+  <info><![CDATA[## Crisis Triage with Ambiguous Incidents
+
+### The Story
+
+A municipal emergency operations center receives a stream of crisis reports. Three dispatchers — a Veteran, a Rookie, and an Analyst — must assess each incident's severity and route it to the appropriate response tier (Basic, Expert, or Coordinator).
+
+The twist: many incidents are **deliberately misleading**. A "toxic chemical spill at a school" turns out to be spilled vinegar. A "minor water leak" threatens a neonatal ICU. Naive keyword matching fails on these cases — but an LLM reading the full impact description can get them right.
+
+### What This Demonstrates
+
+This demo exercises 8 LLM extension primitives, grounded in the Gao et al. (2312.11970) LLM-ABM survey:
+
+| Primitive | Where Used | Paper Concept |
+|-----------|-----------|---------------|
+| `llm:load-config` | Setup | Config management |
+| `llm:set-history` | Dispatcher personas | Personalization (Ch.2) |
+| `llm:chat-with-template` | Severity triage | Environment/Interface (Ch.1) |
+| `llm:choose` | Tier routing | Bounded Rationality |
+| `llm:history` | Reflection trigger | Memory (Ch.3) |
+| `llm:chat` | Dispatcher reflection | Reflection (Ch.3) |
+| `llm:clear-history` | Episode boundaries | Memory ablation |
+| `llm:active` | Status monitor | Provider awareness |
+
+### Quick Start
+
+1. Edit `config.txt` with your provider credentials (default: local Ollama).
+2. Click **setup**.
+3. Click **go**.
+4. Watch the output log for `[TRIAGE]`, `[ROUTE]`, and `[REFLECT]` messages.
+5. Compare the **Misleading%** monitor — this is where the LLM shines vs heuristics.
+
+### The A/B Experiment
+
+Toggle **use-llm?** OFF to switch to pure heuristic mode:
 
-This demo simulates emergency incident flow through three responder tiers:
+- **Heuristic mode**: Keyword matching triggers on "fire", "toxic", "collapse" etc. Works fine on clear cases (~70%) but scores ~30% on misleading cases where keywords don't match reality.
+- **LLM mode**: Reads the full impact description. Expected ~70%+ on misleading cases.
 
-1. Basic agents handle low complexity cases.
-2. Expert agents handle moderate and high severity cases.
-3. Coordinators handle critical cases and rebalance misrouted overload.
+Run both modes for 50+ ticks and compare the Accuracy Over Time plot.
 
-Each new incident is triaged with `llm:chat-with-template` using `triage-template.yaml`.
-Routing then uses `dispatcher-template.yaml` and capacity-aware fallback logic.
+### Controls
 
-### Run
+- **use-llm?**: Toggle between LLM dispatchers and naive heuristic
+- **memory-mode**: How dispatcher memory works across episodes
+  - *persistent*: Full conversation history retained
+  - *per-episode*: History cleared each episode, persona re-injected
+  - *none*: History cleared each episode, no persona
+- **reflection-interval**: How often dispatchers reflect on their performance (0 = never)
+- **incident-rate**: Probability (%) of a new incident each tick
+- **episode-length**: Ticks per episode (0 = no episodes)
 
-1. Update `demos/crisis-triage/config.txt` with your provider + credentials.
-2. Click `setup`.
-3. Click `go`.
-4. Use `new-case` to inject incidents manually.
+### What to Observe
 
-If LLM config is unavailable, the model automatically uses deterministic heuristic triage.</info>
+- **Triage Acc%**: How often dispatchers match ground-truth severity
+- **Misleading%**: Accuracy specifically on misleading incidents (the key metric)
+- **Route Acc%**: How often incidents go to the correct response tier
+- **Per-persona differences**: Veteran vs Rookie vs Analyst performance
+- **Reflection output**: Watch dispatchers reason about their own performance in the log
+- **Memory effects**: Compare persistent vs per-episode vs none over multiple episodes
+
+### Design Rationale
+
+**Why dispatchers (not responders) use LLM**: Triage and routing are judgment calls where context matters. Processing is mechanical — it doesn't benefit from language understanding.
+
+**Why no thinking/reasoning models**: Speed (3 dispatchers x 2 calls/tick would take minutes with thinking), cost (300+ calls per session), and overkill for classification tasks.
+
+**Why `llm:choose` for routing**: Guarantees output is one of the valid tiers, avoiding parsing failures. The extension handles fuzzy matching and falls back to random choice if the LLM response can't be parsed.
+]]></info>
   <turtleShapes>
     <shape name="default" rotatable="true" editableColorIndex="0">
       <polygon color="-1920102913" filled="true" marked="true">
@@ -545,6 +1127,33 @@ If LLM config is unavailable, the model automatically uses deterministic heurist
     <shape name="circle" rotatable="false" editableColorIndex="0">
       <circle x="0" y="0" marked="true" color="-1920102913" diameter="300" filled="true"></circle>
     </shape>
+    <shape name="person" rotatable="false" editableColorIndex="0">
+      <circle x="110" y="5" marked="true" color="-1920102913" diameter="80" filled="true"></circle>
+      <polygon color="-1920102913" filled="true" marked="true">
+        <point x="105" y="90"></point>
+        <point x="120" y="195"></point>
+        <point x="90" y="285"></point>
+        <point x="105" y="300"></point>
+        <point x="135" y="300"></point>
+        <point x="150" y="225"></point>
+        <point x="165" y="300"></point>
+        <point x="195" y="300"></point>
+        <point x="210" y="285"></point>
+        <point x="180" y="195"></point>
+        <point x="195" y="90"></point>
+      </polygon>
+      <rectangle startX="127" startY="79" endX="172" endY="94" color="-1920102913" filled="true" marked="true"></rectangle>
+      <polygon color="-1920102913" filled="true" marked="true">
+        <point x="195" y="90"></point>
+        <point x="240" y="150"></point>
+        <point x="225" y="180"></point>
+      </polygon>
+      <polygon color="-1920102913" filled="true" marked="true">
+        <point x="105" y="90"></point>
+        <point x="60" y="150"></point>
+        <point x="75" y="180"></point>
+      </polygon>
+    </shape>
   </turtleShapes>
   <linkShapes>
     <shape name="default" curviness="0.0">
@@ -570,5 +1179,5 @@ If LLM config is unavailable, the model automatically uses deterministic heurist
       </indicator>
     </shape>
   </linkShapes>
-  <previewCommands>setup repeat 75 [ go ]</previewCommands>
+  <previewCommands>setup repeat 30 [ go ]</previewCommands>
 </model>
diff --git a/demos/crisis-triage/dispatcher-template.yaml b/demos/crisis-triage/dispatcher-template.yaml
index 291a3a4..f018c6d 100644
--- a/demos/crisis-triage/dispatcher-template.yaml
+++ b/demos/crisis-triage/dispatcher-template.yaml
@@ -1,15 +1,17 @@
-system: "You are a crisis operations dispatcher. Route incidents to maximize response quality under load."
+# ABOUTME: Documentation stub for the dispatcher routing step.
+# ABOUTME: Routing now uses llm:choose for bounded tier selection instead of template parsing.
+#
+# This file is kept for reference. The actual routing in crisis-triage.nlogox
+# uses llm:choose with choices ["BASIC" "EXPERT" "COORDINATOR" "HOLD"],
+# which guarantees the response is one of the valid tiers.
+#
+# The dispatcher's conversational context (persona, history) is maintained
+# via llm:set-history and accumulated through llm:chat-with-template calls.
+system: "You are a crisis operations dispatcher. Route incidents to the appropriate response tier."
 template: |
-  Severity band: {severity}
-  Incident summary: {incident}
+  Severity: {severity}
+  Incident: {incident}
+  Current load — BASIC: {basic_load}, EXPERT: {expert_load}, COORDINATOR: {coordinator_load}
 
-  Current active load:
-  BASIC={basic_load}
-  EXPERT={expert_load}
-  COORDINATOR={coordinator_load}
-
-  Choose the best tier for this incident considering both severity and current load.
-
-  Return exactly two lines:
-  ROUTE: BASIC|EXPERT|COORDINATOR
-  REASON: <= 18 words
+  Choose the best response tier considering severity and current workload.
+  Respond with EXACTLY ONE of: BASIC, EXPERT, COORDINATOR, HOLD
diff --git a/demos/crisis-triage/tests/README.md b/demos/crisis-triage/tests/README.md
index fc5ec4b..edd3703 100644
--- a/demos/crisis-triage/tests/README.md
+++ b/demos/crisis-triage/tests/README.md
@@ -6,10 +6,15 @@ Run from repository root:
 python -m unittest discover -s demos/crisis-triage/tests -p "test_*.py" -v
 ```
 
-These tests validate:
+These tests validate (29 tests, no API calls):
 
 - Presence of all required demo files
-- NetLogo 7 `.nlogox` tiered-agent and triage/dispatch procedure structure
-- LLM template variable consistency with model substitutions
-- Config key completeness
-- README documentation coverage
+- Breed declarations (dispatchers, incidents, responders)
+- Required procedures (setup, triage, routing, reflection, episode boundary)
+- All 8 LLM primitives present in code
+- Template placeholder consistency with model substitutions
+- Config key completeness and max_tokens=200
+- README documentation sections
+- XML structure (widgets, shapes, plots, CDATA)
+- Incident bank has 30 entries (10 misleading + 10 clear + 10 borderline)
+- Procedure block matching (every `to` has an `end`)
diff --git a/demos/crisis-triage/tests/test_crisis_triage.py b/demos/crisis-triage/tests/test_crisis_triage.py
index 55b43ae..183920d 100644
--- a/demos/crisis-triage/tests/test_crisis_triage.py
+++ b/demos/crisis-triage/tests/test_crisis_triage.py
@@ -1,3 +1,6 @@
+# ABOUTME: Static validation tests for the crisis triage demo.
+# ABOUTME: Tests file structure, XML format, code structure, and template consistency.
+
 import re
 import unittest
 import xml.etree.ElementTree as ET
@@ -17,12 +20,10 @@ def read(path: Path) -> str:
 
 
 def parse_model() -> ET.Element:
-    """Parse the .nlogox model file as XML and return the root element."""
     return ET.parse(MODEL_PATH).getroot()
 
 
 def model_code_only() -> str:
-    """Extract the NetLogo code from the <code> CDATA section using XML parsing."""
     root = parse_model()
     code_elem = root.find("code")
     if code_elem is None or code_elem.text is None:
@@ -55,76 +56,79 @@ def test_required_files_exist(self) -> None:
         for path in required:
             self.assertTrue(path.exists(), f"missing file: {path}")
 
-    def test_model_declares_tiered_breeds(self) -> None:
+    def test_model_declares_breeds(self) -> None:
         code = model_code_only()
-        self.assertIn("breed [cases case]", code)
-        self.assertIn("breed [basic-agents basic-agent]", code)
-        self.assertIn("breed [expert-agents expert-agent]", code)
-        self.assertIn("breed [coordinators coordinator]", code)
+        self.assertIn("breed [ dispatchers dispatcher ]", code)
+        self.assertIn("breed [ incidents incident ]", code)
+        self.assertIn("breed [ responders responder ]", code)
 
     def test_model_contains_required_procedures(self) -> None:
         code = model_code_only()
         procedures = [
             "to setup",
             "to setup-llm",
-            "to triage-new-cases",
-            "to perform-triage",
-            "to route-triaged-cases",
-            "to dispatch-case",
-            "to coordinator-rebalance",
-            "to reassign-case",
-            "to process-assigned-cases",
-            "to finalize-case",
+            "to setup-dispatchers",
+            "to setup-responders",
+            "to go",
+            "to triage-my-incidents",
+            "to route-my-incidents",
+            "to process-active-cases",
+            "to dispatcher-reflect",
+            "to handle-episode-boundary",
         ]
         for proc in procedures:
             self.assertIn(proc, code, f"missing procedure: {proc}")
 
-    def test_model_uses_llm_templates_and_config(self) -> None:
+    def test_model_uses_llm_config_and_template(self) -> None:
         code = model_code_only()
         self.assertIn('set config-path "demos/crisis-triage/config.txt"', code)
         self.assertIn('set triage-template-path "demos/crisis-triage/triage-template.yaml"', code)
-        self.assertIn('set dispatcher-template-path "demos/crisis-triage/dispatcher-template.yaml"', code)
         self.assertIn("llm:chat-with-template triage-template-path", code)
-        self.assertIn("llm:chat-with-template dispatcher-template-path", code)
-        self.assertIn("heuristic-severity-report", code)
+
+    def test_model_uses_all_eight_primitives(self) -> None:
+        code = model_code_only()
+        primitives = [
+            "llm:load-config",
+            "llm:set-history",
+            "llm:chat-with-template",
+            "llm:choose",
+            "llm:history",
+            "llm:chat",
+            "llm:clear-history",
+            "llm:active",
+        ]
+        for prim in primitives:
+            self.assertIn(prim, code, f"missing LLM primitive: {prim}")
 
     def test_triage_template_placeholders_match_model(self) -> None:
         template = read(TRIAGE_TEMPLATE_PATH)
         placeholders = set(re.findall(r"\{([a-zA-Z_][a-zA-Z0-9_]*)\}", template))
         self.assertEqual(
             placeholders,
-            {"incident", "impact", "elapsed_ticks", "known_context"},
-        )
-        self.assertIn("SEVERITY: LOW|MODERATE|HIGH|CRITICAL", template)
-
-    def test_dispatcher_template_placeholders_match_model(self) -> None:
-        template = read(DISPATCHER_TEMPLATE_PATH)
-        placeholders = set(re.findall(r"\{([a-zA-Z_][a-zA-Z0-9_]*)\}", template))
-        self.assertEqual(
-            placeholders,
-            {"severity", "incident", "basic_load", "expert_load", "coordinator_load"},
+            {"persona", "episode", "tick", "incident", "impact"},
         )
-        self.assertIn("ROUTE: BASIC|EXPERT|COORDINATOR", template)
 
     def test_config_has_required_keys(self) -> None:
         config = parse_config(CONFIG_PATH)
         for key in ["provider", "model", "temperature", "max_tokens", "timeout_seconds"]:
             self.assertIn(key, config, f"missing key in config: {key}")
 
+    def test_config_max_tokens_is_200(self) -> None:
+        config = parse_config(CONFIG_PATH)
+        self.assertEqual(config["max_tokens"], "200")
+
     def test_readme_has_core_sections(self) -> None:
         readme = read(README_PATH)
         for text in [
-            "What it demonstrates",
-            "Model architecture",
-            "Run instructions",
-            "Test suite",
+            "Quick Start",
+            "A/B Experiment",
+            "Design Rationale",
+            "Paper Connection",
         ]:
             self.assertIn(text, readme)
 
 
 class TestModelXmlParsing(unittest.TestCase):
-    """Validate the .nlogox file using proper XML parsing instead of regex."""
-
     def setUp(self) -> None:
         self.root = parse_model()
 
@@ -149,16 +153,25 @@ def test_widgets_section_has_expected_children(self) -> None:
         self.assertIn("view", child_tags)
         self.assertIn("button", child_tags)
         self.assertIn("monitor", child_tags)
+        self.assertIn("switch", child_tags)
+        self.assertIn("chooser", child_tags)
+        self.assertIn("slider", child_tags)
+        self.assertIn("plot", child_tags)
 
     def test_widgets_button_count(self) -> None:
         widgets = self.root.find("widgets")
         buttons = widgets.findall("button")
-        self.assertEqual(len(buttons), 3, "expected 3 buttons: setup, go, new-case")
+        self.assertEqual(len(buttons), 4, "expected 4 buttons: setup, go, add-incident, force-reflect")
 
     def test_widgets_monitor_count(self) -> None:
         widgets = self.root.find("widgets")
         monitors = widgets.findall("monitor")
-        self.assertGreaterEqual(len(monitors), 7, "expected at least 7 monitors")
+        self.assertGreaterEqual(len(monitors), 12, "expected at least 12 monitors")
+
+    def test_widgets_plot_count(self) -> None:
+        widgets = self.root.find("widgets")
+        plots = widgets.findall("plot")
+        self.assertEqual(len(plots), 2, "expected 2 plots: Accuracy Over Time, Case Flow")
 
     def test_turtle_shapes_defined(self) -> None:
         shapes = self.root.find("turtleShapes")
@@ -166,11 +179,10 @@ def test_turtle_shapes_defined(self) -> None:
         shape_names = [s.get("name") for s in shapes.findall("shape")]
         self.assertIn("default", shape_names)
         self.assertIn("circle", shape_names)
+        self.assertIn("person", shape_names)
 
 
 class TestModelStructure(unittest.TestCase):
-    """Structural assertions on the NetLogo 7.x .nlogox format."""
-
     def setUp(self) -> None:
         self.root = parse_model()
 
@@ -208,8 +220,6 @@ def test_link_shapes_has_default(self) -> None:
 
 
 class TestBehaviorRegression(unittest.TestCase):
-    """Catch regressions in model syntax and LLM extension usage patterns."""
-
     def setUp(self) -> None:
         self.code = model_code_only()
 
@@ -217,14 +227,11 @@ def test_extensions_declaration_present(self) -> None:
         self.assertIn("extensions [ llm ]", self.code)
 
     def test_chat_with_template_uses_list_syntax(self) -> None:
-        """Ensure llm:chat-with-template uses (list ...) not [...] for variables."""
         lines = self.code.splitlines()
         for line in lines:
             stripped = line.strip()
             if "llm:chat-with-template" not in stripped:
                 continue
-            # The template call should be followed by (list on the same or next
-            # logical line.  It must NOT use bracket syntax like [["key" val]].
             self.assertNotRegex(
                 stripped,
                 r'llm:chat-with-template\s+\S+\s+\[\[',
@@ -232,7 +239,6 @@ def test_chat_with_template_uses_list_syntax(self) -> None:
             )
 
     def test_no_inline_provider_setup_in_procedures(self) -> None:
-        """Model should use llm:load-config, not manual set-provider/set-api-key."""
         for deprecated in ["llm:set-provider", "llm:set-api-key", "llm:set-model"]:
             self.assertNotIn(
                 deprecated,
@@ -241,7 +247,6 @@ def test_no_inline_provider_setup_in_procedures(self) -> None:
             )
 
     def test_all_procedure_blocks_are_closed(self) -> None:
-        """Every 'to' or 'to-report' must have a matching 'end'."""
         opens = len(re.findall(r"^to(?:-report)?\s", self.code, re.MULTILINE))
         closes = len(re.findall(r"^end\s*$", self.code, re.MULTILINE))
         self.assertEqual(
@@ -251,7 +256,6 @@ def test_all_procedure_blocks_are_closed(self) -> None:
         )
 
     def test_no_deprecated_primitives(self) -> None:
-        """Guard against usage of removed or renamed LLM extension primitives."""
         deprecated = [
             "llm:ask",
             "llm:send",
@@ -264,12 +268,19 @@ def test_no_deprecated_primitives(self) -> None:
     def test_globals_declared(self) -> None:
         self.assertIn("globals [", self.code)
         for g in ["llm-ready?", "config-path", "triage-template-path",
-                   "dispatcher-template-path"]:
+                   "incident-bank", "total-triaged", "correct-triage"]:
             self.assertIn(g, self.code, f"missing global: {g}")
 
-    def test_breed_owns_blocks_present(self) -> None:
-        self.assertIn("turtles-own [", self.code)
-        self.assertIn("cases-own [", self.code)
+    def test_incident_bank_has_30_entries(self) -> None:
+        """The incident bank should contain 30 incidents (10 misleading + 10 clear + 10 borderline)."""
+        code = self.code
+        # Count (list " patterns inside build-incident-bank — each incident starts with (list "
+        bank_start = code.find("to build-incident-bank")
+        bank_end = code.find("\nend", bank_start)
+        bank_code = code[bank_start:bank_end]
+        incident_count = bank_code.count('(list "')
+        # The outer (list wrapping all incidents doesn't start with (list "
+        self.assertEqual(incident_count, 30, f"expected 30 incidents, found {incident_count}")
 
 
 if __name__ == "__main__":
diff --git a/demos/crisis-triage/triage-template.yaml b/demos/crisis-triage/triage-template.yaml
index 3f60565..1c392b0 100644
--- a/demos/crisis-triage/triage-template.yaml
+++ b/demos/crisis-triage/triage-template.yaml
@@ -1,12 +1,21 @@
-system: "You are an emergency triage specialist. Assess risk conservatively and consistently."
+# ABOUTME: Triage template for crisis severity assessment with anti-keyword-bias guidance.
+# ABOUTME: Used by dispatchers via llm:chat-with-template to classify incident severity.
+system: |
+  You are a crisis triage specialist with this background: {persona}
+  This is episode {episode}, tick {tick} of a municipal emergency simulation.
+
+  IMPORTANT: Do NOT rely on scary-sounding keywords alone. A "fire alarm" in a
+  server room may be a sensor malfunction. A "data center cooling loss" may threaten
+  lives if hospitals depend on it. Assess the ACTUAL described impact, not the
+  surface-level vocabulary.
+
+  Classify severity as exactly one of: LOW, MODERATE, HIGH, CRITICAL.
 template: |
-  Incident summary: {incident}
-  Reported impact: {impact}
-  Time since report (ticks): {elapsed_ticks}
-  Context: {known_context}
+  Incident: {incident}
+  Impact: {impact}
 
-  Classify this incident severity for a municipal response team.
+  Based on the described impact (not keywords), classify this incident severity.
 
   Return exactly two lines:
   SEVERITY: LOW|MODERATE|HIGH|CRITICAL
-  JUSTIFICATION: <= 18 words grounded in impact and urgency
+  JUSTIFICATION: one sentence explaining your assessment based on actual impact

From deaa19234097c601f307e955300d92107923e902 Mon Sep 17 00:00:00 2001
From: JNK234 <jwalapuramnarasimha@gmail.com>
Date: Thu, 12 Mar 2026 21:40:23 -0500
Subject: [PATCH 11/12] chore: upgrade default model from llama3.2:3b to
 qwen2.5:7b

The 3B model systematically under-triages incidents (defaults to LOW),
producing ~22% triage accuracy. The 7B model should better calibrate
across the 4-level severity scale.
---
 demos/crisis-triage/config.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/demos/crisis-triage/config.txt b/demos/crisis-triage/config.txt
index aed3eab..e927166 100644
--- a/demos/crisis-triage/config.txt
+++ b/demos/crisis-triage/config.txt
@@ -3,7 +3,7 @@
 
 # Recommended local/default option (no cloud key required)
 provider=ollama
-model=llama3.2:3b
+model=qwen2.5:7b
 base_url=http://localhost:11434
 
 # Runtime behavior

From 93675c7bce1e96e9c40917a2aa0bb9426c12c7cb Mon Sep 17 00:00:00 2001
From: JNK234 <jwalapuramnarasimha@gmail.com>
Date: Thu, 12 Mar 2026 22:35:56 -0500
Subject: [PATCH 12/12] fix: improve triage and routing prompts for better
 accuracy

Triage template:
- Add severity calibration anchors (LOW=no injuries/routine, MODERATE=minor
  injuries/single-agency, HIGH=significant injuries/infrastructure at risk,
  CRITICAL=life-threatening/multi-agency/cascading)
- Simplify output format: severity word first then brief reason, instead of
  rigid two-line SEVERITY/JUSTIFICATION format

Routing prompt:
- Show tier capacity context (e.g., BASIC: 2/9) so the model can assess
  whether a tier is nearly full or barely used
---
 demos/crisis-triage/crisis-triage.nlogox | 150 +++++++----------------
 demos/crisis-triage/triage-template.yaml |  13 +-
 2 files changed, 50 insertions(+), 113 deletions(-)

diff --git a/demos/crisis-triage/crisis-triage.nlogox b/demos/crisis-triage/crisis-triage.nlogox
index 873b00e..3b84ec6 100644
--- a/demos/crisis-triage/crisis-triage.nlogox
+++ b/demos/crisis-triage/crisis-triage.nlogox
@@ -559,9 +559,9 @@ to route-my-incidents
         "Incident: " [summary] of target "\n"
         "Severity: " [assessed-severity] of target "\n"
         "Impact: " [impact] of target "\n"
-        "Current load — BASIC: " count-active-tier "BASIC"
-        ", EXPERT: " count-active-tier "EXPERT"
-        ", COORDINATOR: " count-active-tier "COORDINATOR" "\n"
+        "Current load — BASIC: " count-active-tier "BASIC" "/9"
+        ", EXPERT: " count-active-tier "EXPERT" "/6"
+        ", COORDINATOR: " count-active-tier "COORDINATOR" "/3" "\n"
         "Routing rules based on severity:\n"
         " - LOW severity -> BASIC\n"
         " - MODERATE severity -> BASIC (or EXPERT if BASIC is full)\n"
@@ -913,141 +913,75 @@ to-report queue-resolved-count
 end
 ]]></code>
   <widgets>
-    <!-- World view -->
-    <view x="310" wrappingAllowedX="false" y="10" frameRate="30.0"
-          minPycor="-16" height="498" showTickCounter="true"
-          patchSize="15.0" fontSize="10" wrappingAllowedY="false"
-          width="498" tickCounterLabel="ticks"
-          maxPycor="16" updateMode="1" maxPxcor="16" minPxcor="-16"></view>
-
-    <!-- Buttons -->
-    <button x="15" y="15" height="33" disableUntilTicks="false"
-            forever="false" kind="Observer" display="setup" width="90" sizeVersion="0">setup</button>
-    <button x="115" y="15" height="33" disableUntilTicks="false"
-            forever="true" kind="Observer" display="go" width="90" sizeVersion="0">go</button>
-    <button x="15" y="55" height="33" disableUntilTicks="false"
-            forever="false" kind="Observer" display="add incident" width="90" sizeVersion="0">add-incident</button>
-    <button x="115" y="55" height="33" disableUntilTicks="false"
-            forever="false" kind="Observer" display="force reflect" width="90" sizeVersion="0">force-reflect</button>
-
-    <!-- Switch: use-llm? -->
-    <switch x="15" y="100" height="33" variable="use-llm?" on="true"
-            width="190" sizeVersion="0" display="use-llm?"></switch>
-
-    <!-- Chooser: memory-mode -->
-    <chooser x="15" y="140" height="45" variable="memory-mode"
-             width="190" sizeVersion="0" display="memory-mode" current="0">
+    <view x="310" wrappingAllowedX="false" y="10" frameRate="30.0" minPycor="-16" height="498" showTickCounter="true" patchSize="15.0" fontSize="10" wrappingAllowedY="false" width="498" tickCounterLabel="ticks" maxPycor="16" updateMode="1" maxPxcor="16" minPxcor="-16"></view>
+    <button x="15" y="15" height="33" disableUntilTicks="false" forever="false" kind="Observer" display="setup" width="90" sizeVersion="0">setup</button>
+    <button x="115" y="15" height="33" disableUntilTicks="false" forever="true" kind="Observer" display="go" width="90" sizeVersion="0">go</button>
+    <button x="15" y="55" height="33" disableUntilTicks="false" forever="false" kind="Observer" display="add incident" width="90" sizeVersion="0">add-incident</button>
+    <button x="115" y="55" height="33" disableUntilTicks="false" forever="false" kind="Observer" display="force reflect" width="90" sizeVersion="0">force-reflect</button>
+    <switch x="15" y="100" height="33" on="true" variable="use-llm?" display="use-llm?" width="190" sizeVersion="0"></switch>
+    <chooser x="15" y="140" height="45" variable="memory-mode" current="0" display="memory-mode" width="190" sizeVersion="0">
       <choice type="string" value="persistent"></choice>
       <choice type="string" value="per-episode"></choice>
       <choice type="string" value="none"></choice>
     </chooser>
-
-    <!-- Sliders -->
-    <slider x="15" y="195" height="33" variable="reflection-interval"
-            min="0" max="50" default="10.0" step="5"
-            width="190" sizeVersion="0" display="reflection-interval"
-            direction="Horizontal"></slider>
-    <slider x="15" y="235" height="33" variable="incident-rate"
-            min="0" max="100" default="30.0" step="5"
-            width="190" sizeVersion="0" display="incident-rate"
-            direction="Horizontal"></slider>
-    <slider x="15" y="275" height="33" variable="episode-length"
-            min="0" max="100" default="25.0" step="5"
-            width="190" sizeVersion="0" display="episode-length"
-            direction="Horizontal"></slider>
-
-    <!-- Monitors: Row 1 — LLM status and mode -->
-    <monitor x="15" precision="17" y="320" height="40" fontSize="9"
-             display="LLM Provider" width="190" sizeVersion="0">llm-status</monitor>
-    <monitor x="15" precision="17" y="360" height="40" fontSize="9"
-             display="Episode" width="90" sizeVersion="0">current-episode</monitor>
-    <monitor x="115" precision="17" y="360" height="40" fontSize="9"
-             display="Mode" width="90" sizeVersion="0">memory-mode</monitor>
-
-    <!-- Monitors: Row 2 — Queues -->
-    <monitor x="15" precision="17" y="405" height="40" fontSize="9"
-             display="New" width="60" sizeVersion="0">queue-new-count</monitor>
-    <monitor x="80" precision="17" y="405" height="40" fontSize="9"
-             display="Triaged" width="60" sizeVersion="0">queue-triaged-count</monitor>
-    <monitor x="145" precision="17" y="405" height="40" fontSize="9"
-             display="Active" width="60" sizeVersion="0">queue-active-count</monitor>
-
-    <!-- Monitors: Row 3 — Core metrics -->
-    <monitor x="15" precision="1" y="450" height="40" fontSize="9"
-             display="Triage Acc%" width="95" sizeVersion="0">triage-accuracy</monitor>
-    <monitor x="115" precision="1" y="450" height="40" fontSize="9"
-             display="Route Acc%" width="95" sizeVersion="0">route-accuracy</monitor>
-
-    <!-- Monitors: Row 4 — Additional metrics -->
-    <monitor x="15" precision="1" y="495" height="40" fontSize="9"
-             display="Misleading%" width="95" sizeVersion="0">misleading-accuracy</monitor>
-    <monitor x="115" precision="1" y="495" height="40" fontSize="9"
-             display="Avg Resp" width="95" sizeVersion="0">avg-response-time</monitor>
-
-    <!-- Monitors: Row 5 — Per-persona accuracy -->
-    <monitor x="15" precision="17" y="540" height="40" fontSize="9"
-             display="Veteran" width="65" sizeVersion="0">veteran-accuracy</monitor>
-    <monitor x="85" precision="17" y="540" height="40" fontSize="9"
-             display="Rookie" width="65" sizeVersion="0">rookie-accuracy</monitor>
-    <monitor x="155" precision="17" y="540" height="40" fontSize="9"
-             display="Analyst" width="55" sizeVersion="0">analyst-accuracy</monitor>
-
-    <!-- Monitors: Row 6 — Rates -->
-    <monitor x="15" precision="1" y="585" height="40" fontSize="9"
-             display="Late%" width="65" sizeVersion="0">late-rate</monitor>
-    <monitor x="85" precision="1" y="585" height="40" fontSize="9"
-             display="Escalation%" width="65" sizeVersion="0">escalation-rate</monitor>
-    <monitor x="155" precision="17" y="585" height="40" fontSize="9"
-             display="Resolved" width="55" sizeVersion="0">total-resolved</monitor>
-
-    <!-- Plot: Accuracy Over Time -->
-    <plot x="820" y="10" height="230" width="310" display="Accuracy Over Time"
-          xAxis="ticks" yAxis="%" xMin="0.0" xMax="10.0" yMin="0.0" yMax="100.0"
-          autoPlotX="true" autoPlotY="true" legend="true">
+    <slider x="15" step="5" y="195" max="50" display="reflection-interval" height="33" min="0" direction="Horizontal" default="10.0" variable="reflection-interval" width="190" sizeVersion="0"></slider>
+    <slider x="15" step="5" y="235" max="100" display="incident-rate" height="33" min="0" direction="Horizontal" default="30.0" variable="incident-rate" width="190" sizeVersion="0"></slider>
+    <slider x="15" step="5" y="275" max="100" display="episode-length" height="33" min="0" direction="Horizontal" default="25.0" variable="episode-length" width="190" sizeVersion="0"></slider>
+    <monitor x="15" precision="17" y="320" height="40" fontSize="9" display="LLM Provider" width="190" sizeVersion="0">llm-status</monitor>
+    <monitor x="15" precision="17" y="360" height="40" fontSize="9" display="Episode" width="90" sizeVersion="0">current-episode</monitor>
+    <monitor x="115" precision="17" y="360" height="40" fontSize="9" display="Mode" width="90" sizeVersion="0">memory-mode</monitor>
+    <monitor x="15" precision="17" y="405" height="40" fontSize="9" display="New" width="60" sizeVersion="0">queue-new-count</monitor>
+    <monitor x="80" precision="17" y="405" height="40" fontSize="9" display="Triaged" width="60" sizeVersion="0">queue-triaged-count</monitor>
+    <monitor x="145" precision="17" y="405" height="40" fontSize="9" display="Active" width="60" sizeVersion="0">queue-active-count</monitor>
+    <monitor x="15" precision="1" y="450" height="40" fontSize="9" display="Triage Acc%" width="95" sizeVersion="0">triage-accuracy</monitor>
+    <monitor x="115" precision="1" y="450" height="40" fontSize="9" display="Route Acc%" width="95" sizeVersion="0">route-accuracy</monitor>
+    <monitor x="15" precision="1" y="495" height="40" fontSize="9" display="Misleading%" width="95" sizeVersion="0">misleading-accuracy</monitor>
+    <monitor x="115" precision="1" y="495" height="40" fontSize="9" display="Avg Resp" width="95" sizeVersion="0">avg-response-time</monitor>
+    <monitor x="15" precision="17" y="540" height="40" fontSize="9" display="Veteran" width="65" sizeVersion="0">veteran-accuracy</monitor>
+    <monitor x="85" precision="17" y="540" height="40" fontSize="9" display="Rookie" width="65" sizeVersion="0">rookie-accuracy</monitor>
+    <monitor x="155" precision="17" y="540" height="40" fontSize="9" display="Analyst" width="55" sizeVersion="0">analyst-accuracy</monitor>
+    <monitor x="15" precision="1" y="585" height="40" fontSize="9" display="Late%" width="65" sizeVersion="0">late-rate</monitor>
+    <monitor x="85" precision="1" y="585" height="40" fontSize="9" display="Escalation%" width="65" sizeVersion="0">escalation-rate</monitor>
+    <monitor x="155" precision="17" y="585" height="40" fontSize="9" display="Resolved" width="55" sizeVersion="0">total-resolved</monitor>
+    <plot x="820" autoPlotX="true" yMax="100.0" autoPlotY="true" yAxis="%" y="10" xMin="0.0" height="230" legend="true" xMax="10.0" yMin="0.0" width="310" xAxis="ticks" display="Accuracy Over Time">
       <setup></setup>
       <update></update>
-      <pen display="Triage" interval="1.0" mode="0" color="-13345367" legend="true">
+      <pen interval="1.0" mode="0" display="Triage" color="-13345367" legend="true">
         <setup></setup>
         <update>plot triage-accuracy</update>
       </pen>
-      <pen display="Route" interval="1.0" mode="0" color="-2674135" legend="true">
+      <pen interval="1.0" mode="0" display="Route" color="-2674135" legend="true">
         <setup></setup>
         <update>plot route-accuracy</update>
       </pen>
-      <pen display="Misleading" interval="1.0" mode="0" color="-5825686" legend="true">
+      <pen interval="1.0" mode="0" display="Misleading" color="-5825686" legend="true">
         <setup></setup>
         <update>plot misleading-accuracy</update>
       </pen>
     </plot>
-
-    <!-- Plot: Case Flow -->
-    <plot x="820" y="250" height="230" width="310" display="Case Flow"
-          xAxis="ticks" yAxis="count" xMin="0.0" xMax="10.0" yMin="0.0" yMax="10.0"
-          autoPlotX="true" autoPlotY="true" legend="true">
+    <plot x="820" autoPlotX="true" yMax="10.0" autoPlotY="true" yAxis="count" y="250" xMin="0.0" height="230" legend="true" xMax="10.0" yMin="0.0" width="310" xAxis="ticks" display="Case Flow">
       <setup></setup>
       <update></update>
-      <pen display="New" interval="1.0" mode="0" color="-1184463" legend="true">
+      <pen interval="1.0" mode="0" display="New" color="-1184463" legend="true">
         <setup></setup>
         <update>plot queue-new-count</update>
       </pen>
-      <pen display="Active" interval="1.0" mode="0" color="-13345367" legend="true">
+      <pen interval="1.0" mode="0" display="Active" color="-13345367" legend="true">
         <setup></setup>
         <update>plot queue-active-count</update>
       </pen>
-      <pen display="Resolved" interval="1.0" mode="0" color="-7500403" legend="true">
+      <pen interval="1.0" mode="0" display="Resolved" color="-7500403" legend="true">
         <setup></setup>
         <update>plot total-resolved</update>
       </pen>
-      <pen display="Late" interval="1.0" mode="0" color="-2064490" legend="true">
+      <pen interval="1.0" mode="0" display="Late" color="-2064490" legend="true">
         <setup></setup>
         <update>plot total-late</update>
       </pen>
     </plot>
-
-    <!-- Output area for log messages -->
-    <output x="820" y="490" height="130" width="310" fontSize="9"></output>
+    <output x="820" y="490" height="130" fontSize="9" width="310"></output>
   </widgets>
-  <info><![CDATA[## Crisis Triage with Ambiguous Incidents
+  <info>## Crisis Triage with Ambiguous Incidents
 
 ### The Story
 
@@ -1114,7 +1048,7 @@ Run both modes for 50+ ticks and compare the Accuracy Over Time plot.
 **Why no thinking/reasoning models**: Speed (3 dispatchers x 2 calls/tick would take minutes with thinking), cost (300+ calls per session), and overkill for classification tasks.
 
 **Why `llm:choose` for routing**: Guarantees output is one of the valid tiers, avoiding parsing failures. The extension handles fuzzy matching and falls back to random choice if the LLM response can't be parsed.
-]]></info>
+</info>
   <turtleShapes>
     <shape name="default" rotatable="true" editableColorIndex="0">
       <polygon color="-1920102913" filled="true" marked="true">
@@ -1142,7 +1076,7 @@ Run both modes for 50+ ticks and compare the Accuracy Over Time plot.
         <point x="180" y="195"></point>
         <point x="195" y="90"></point>
       </polygon>
-      <rectangle startX="127" startY="79" endX="172" endY="94" color="-1920102913" filled="true" marked="true"></rectangle>
+      <rectangle endX="172" startY="79" marked="true" color="-1920102913" endY="94" startX="127" filled="true"></rectangle>
       <polygon color="-1920102913" filled="true" marked="true">
         <point x="195" y="90"></point>
         <point x="240" y="150"></point>
diff --git a/demos/crisis-triage/triage-template.yaml b/demos/crisis-triage/triage-template.yaml
index 1c392b0..cd9745d 100644
--- a/demos/crisis-triage/triage-template.yaml
+++ b/demos/crisis-triage/triage-template.yaml
@@ -1,4 +1,4 @@
-# ABOUTME: Triage template for crisis severity assessment with anti-keyword-bias guidance.
+# ABOUTME: Triage template for crisis severity assessment with calibration anchors.
 # ABOUTME: Used by dispatchers via llm:chat-with-template to classify incident severity.
 system: |
   You are a crisis triage specialist with this background: {persona}
@@ -9,13 +9,16 @@ system: |
   lives if hospitals depend on it. Assess the ACTUAL described impact, not the
   surface-level vocabulary.
 
+  Severity definitions:
+  - LOW: No injuries, no infrastructure at risk, routine response adequate.
+  - MODERATE: Minor injuries or limited disruption, single-agency response sufficient.
+  - HIGH: Significant injuries, infrastructure at risk, or time-sensitive escalation potential.
+  - CRITICAL: Life-threatening, multi-agency coordination needed, cascading failures, or large population affected.
+
   Classify severity as exactly one of: LOW, MODERATE, HIGH, CRITICAL.
 template: |
   Incident: {incident}
   Impact: {impact}
 
   Based on the described impact (not keywords), classify this incident severity.
-
-  Return exactly two lines:
-  SEVERITY: LOW|MODERATE|HIGH|CRITICAL
-  JUSTIFICATION: one sentence explaining your assessment based on actual impact
+  Reply with the severity level first (LOW, MODERATE, HIGH, or CRITICAL), then a brief reason.