From ee480b4c323007de894137f7f051f3814e9dc0c1 Mon Sep 17 00:00:00 2001 From: JNK234 Date: Thu, 26 Feb 2026 21:23:54 -0600 Subject: [PATCH 01/12] demo2: add crisis-triage NetLogo model --- demos/crisis-triage/crisis-triage.nlogo | 706 ++++++++++++++++++++++++ 1 file changed, 706 insertions(+) create mode 100644 demos/crisis-triage/crisis-triage.nlogo diff --git a/demos/crisis-triage/crisis-triage.nlogo b/demos/crisis-triage/crisis-triage.nlogo new file mode 100644 index 0000000..83d28d1 --- /dev/null +++ b/demos/crisis-triage/crisis-triage.nlogo @@ -0,0 +1,706 @@ +extensions [ llm ] + +globals [ + llm-ready? + config-path + triage-template-path + dispatcher-template-path + processed-basic + processed-expert + processed-coordinator + escalated-count + seeded-crises + case-arrival-probability +] + +breed [cases case] +breed [basic-agents basic-agent] +breed [expert-agents expert-agent] +breed [coordinators coordinator] + +turtles-own [ + tier + capacity + current-load + processed-count +] + +cases-own [ + incident-summary + reported-impact + severity-band + severity-score + queue-state + assigned-tier + assigned-agent + handling-notes + created-at +] + +to setup + clear-all + set config-path "demos/crisis-triage/config.txt" + set triage-template-path "demos/crisis-triage/triage-template.yaml" + set dispatcher-template-path "demos/crisis-triage/dispatcher-template.yaml" + + set processed-basic 0 + set processed-expert 0 + set processed-coordinator 0 + set escalated-count 0 + + set seeded-crises 12 + set case-arrival-probability 0.25 + + setup-llm + setup-responders + create-initial-cases seeded-crises + reset-ticks +end + +to setup-llm + set llm-ready? false + carefully [ + if file-exists? config-path [ + llm:load-config config-path + set llm-ready? true + ] + ] [ + set llm-ready? false + print (word "LLM setup fallback to heuristic triage: " error-message) + ] +end + +to setup-responders + create-basic-agents 7 [ + set tier "basic" + set capacity 2 + set current-load 0 + set processed-count 0 + set color 57 + set size 1.6 + set shape "circle" + setxy (-13 + random-float 6) (-12 + random-float 24) + ] + + create-expert-agents 4 [ + set tier "expert" + set capacity 2 + set current-load 0 + set processed-count 0 + set color 15 + set size 1.8 + set shape "circle" + setxy (-3 + random-float 6) (-12 + random-float 24) + ] + + create-coordinators 2 [ + set tier "coordinator" + set capacity 3 + set current-load 0 + set processed-count 0 + set color 105 + set size 2.1 + set shape "circle" + setxy (8 + random-float 6) (-12 + random-float 24) + ] +end + +to create-initial-cases [n] + repeat n [ spawn-random-case ] +end + +to spawn-random-case + let incident-bank (list + (list "Server room smoke alarm" "Power instability in two hospital wings") + (list "Water main rupture" "Transit junction flooded during rush hour") + (list "School bus collision" "Multiple injuries and blocked arterial road") + (list "Warehouse fire flare-up" "Toxic plume reported near residential area") + (list "Regional telecom outage" "Emergency call latency above safe threshold") + (list "Chemical lab leak" "Evacuation radius requested by fire command") + (list "Bridge vibration alert" "Potential structural failure during peak traffic") + (list "Heat wave brownout" "Critical care equipment on backup power") + (list "Subway security incident" "Crowd panic and platform injuries") + (list "Data center cooling loss" "City payment systems offline") + ) + + let picked one-of incident-bank + create-cases 1 [ + set tier "case" + set capacity 0 + set current-load 0 + set processed-count 0 + + set incident-summary item 0 picked + set reported-impact item 1 picked + set severity-band "unassessed" + set severity-score -1 + set queue-state "new" + set assigned-tier "none" + set assigned-agent nobody + set handling-notes "" + set created-at ticks + + set color yellow + set size 1.3 + set shape "circle" + setxy (random-xcor) (max-pycor - random-float 6) + ] +end + +to go + if random-float 1 < case-arrival-probability [ + spawn-random-case + ] + + triage-new-cases + route-triaged-cases + coordinator-rebalance + process-assigned-cases + + tick +end + +to triage-new-cases + ask cases with [queue-state = "new"] [ + perform-triage + ] +end + +to perform-triage + let llm-response "" + + if llm-ready? [ + carefully [ + set llm-response llm:chat-with-template triage-template-path (list + ["incident" incident-summary] + ["impact" reported-impact] + ["elapsed_ticks" (word ticks)] + ["known_context" "Municipal crisis operations center with three response tiers"] + ) + ] [ + set llm-response "" + ] + ] + + if llm-response = "" [ + set llm-response heuristic-severity-report incident-summary reported-impact + ] + + set severity-band extract-severity-label llm-response incident-summary reported-impact + set severity-score severity-score-from-band severity-band + set queue-state "triaged" + set handling-notes (word "TRIAGE " llm-response) + set color color-for-band severity-band +end + +to-report heuristic-severity-report [summary impact] + let merged (word summary " " impact) + + if (position "collision" merged != false) + or (position "toxic" merged != false) + or (position "evacuation" merged != false) + or (position "critical care" merged != false) + or (position "structural" merged != false) [ + report "SEVERITY: CRITICAL" + ] + + if (position "fire" merged != false) + or (position "outage" merged != false) + or (position "flooded" merged != false) + or (position "injuries" merged != false) [ + report "SEVERITY: HIGH" + ] + + report "SEVERITY: MODERATE" +end + +to-report extract-severity-label [assessment summary impact] + let text (word assessment " " summary " " impact) + + if (position "CRITICAL" text != false) or (position "critical" text != false) [ + report "critical" + ] + + if (position "HIGH" text != false) or (position "high" text != false) [ + report "high" + ] + + if (position "MODERATE" text != false) or (position "moderate" text != false) [ + report "moderate" + ] + + if (position "LOW" text != false) or (position "low" text != false) [ + report "low" + ] + + report "moderate" +end + +to-report severity-score-from-band [band] + if band = "low" [ report 25 ] + if band = "moderate" [ report 55 ] + if band = "high" [ report 80 ] + report 95 +end + +to route-triaged-cases + let queue sort-by [[a b] -> [severity-score] of a > [severity-score] of b] (sort (cases with [queue-state = "triaged"])) + foreach queue [ queued-case -> + dispatch-case queued-case + ] +end + +to dispatch-case [target-case] + let preferred-tier dispatch-recommendation target-case + let final-tier available-tier preferred-tier + + if final-tier = "hold" [ + ask target-case [ + set handling-notes (word handling-notes " | waiting-capacity") + ] + stop + ] + + let worker select-worker final-tier + if worker = nobody [ stop ] + + if final-tier != preferred-tier [ + set escalated-count escalated-count + 1 + ] + + ask worker [ + set current-load current-load + 1 + ] + + ask target-case [ + set queue-state "assigned" + set assigned-tier final-tier + set assigned-agent worker + set color color-for-tier final-tier + set handling-notes (word handling-notes " | routed:" final-tier) + set ycor ycor - 4 + ] +end + +to-report dispatch-recommendation [target-case] + let default-tier severity-to-default-tier [severity-band] of target-case + + if not llm-ready? [ + report default-tier + ] + + let llm-response "" + carefully [ + set llm-response llm:chat-with-template dispatcher-template-path (list + ["severity" [severity-band] of target-case] + ["incident" [incident-summary] of target-case] + ["basic_load" (word count cases with [queue-state = "assigned" and assigned-tier = "basic"])] + ["expert_load" (word count cases with [queue-state = "assigned" and assigned-tier = "expert"])] + ["coordinator_load" (word count cases with [queue-state = "assigned" and assigned-tier = "coordinator"])] + ) + ] [ + set llm-response "" + ] + + if llm-response = "" [ report default-tier ] + + let chosen extract-route-label llm-response + if chosen = "unknown" [ report default-tier ] + report chosen +end + +to-report extract-route-label [response] + if (position "COORDINATOR" response != false) or (position "coordinator" response != false) [ + report "coordinator" + ] + + if (position "EXPERT" response != false) or (position "expert" response != false) [ + report "expert" + ] + + if (position "BASIC" response != false) or (position "basic" response != false) [ + report "basic" + ] + + report "unknown" +end + +to-report severity-to-default-tier [band] + if band = "low" [ report "basic" ] + if band = "moderate" [ report "expert" ] + if band = "high" [ report "expert" ] + report "coordinator" +end + +to-report available-tier [preferred-tier] + if preferred-tier = "basic" [ + if any? basic-agents with [current-load < capacity] [ report "basic" ] + if any? expert-agents with [current-load < capacity] [ report "expert" ] + if any? coordinators with [current-load < capacity] [ report "coordinator" ] + report "hold" + ] + + if preferred-tier = "expert" [ + if any? expert-agents with [current-load < capacity] [ report "expert" ] + if any? coordinators with [current-load < capacity] [ report "coordinator" ] + if any? basic-agents with [current-load < capacity] [ report "basic" ] + report "hold" + ] + + if any? coordinators with [current-load < capacity] [ report "coordinator" ] + if any? expert-agents with [current-load < capacity] [ report "expert" ] + report "hold" +end + +to-report select-worker [tier-name] + if tier-name = "basic" [ + if any? basic-agents with [current-load < capacity] [ + report min-one-of basic-agents with [current-load < capacity] [current-load] + ] + ] + + if tier-name = "expert" [ + if any? expert-agents with [current-load < capacity] [ + report min-one-of expert-agents with [current-load < capacity] [current-load] + ] + ] + + if tier-name = "coordinator" [ + if any? coordinators with [current-load < capacity] [ + report min-one-of coordinators with [current-load < capacity] [current-load] + ] + ] + + report nobody +end + +to coordinator-rebalance + if not any? coordinators [ stop ] + + let risky-basic one-of cases with [ + queue-state = "assigned" and + assigned-tier = "basic" and + severity-score >= 70 + ] + if risky-basic != nobody [ + reassign-case risky-basic "expert" "risk escalation" + ] + + let critical-expert one-of cases with [ + queue-state = "assigned" and + assigned-tier = "expert" and + severity-score >= 90 + ] + if critical-expert != nobody [ + reassign-case critical-expert "coordinator" "critical escalation" + ] +end + +to reassign-case [target-case new-tier reason] + if [assigned-tier] of target-case = new-tier [ stop ] + + let new-worker select-worker new-tier + if new-worker = nobody [ stop ] + + let old-worker [assigned-agent] of target-case + if old-worker != nobody [ + ask old-worker [ + set current-load max (list 0 (current-load - 1)) + ] + ] + + ask new-worker [ + set current-load current-load + 1 + ] + + ask target-case [ + set assigned-tier new-tier + set assigned-agent new-worker + set color color-for-tier new-tier + set handling-notes (word handling-notes " | coordinator-reassign:" reason) + ] + + set escalated-count escalated-count + 1 +end + +to process-assigned-cases + ask cases with [queue-state = "assigned"] [ + let completion completion-chance assigned-tier severity-band + if random-float 1 < completion [ + finalize-case self + ] + ] +end + +to-report completion-chance [tier-name band] + if tier-name = "basic" [ report 0.12 ] + if tier-name = "expert" [ + if band = "high" [ report 0.27 ] + if band = "critical" [ report 0.2 ] + report 0.22 + ] + + if band = "critical" [ report 0.34 ] + report 0.28 +end + +to finalize-case [target-case] + let tier-name [assigned-tier] of target-case + let worker [assigned-agent] of target-case + + if worker != nobody [ + ask worker [ + set current-load max (list 0 (current-load - 1)) + set processed-count processed-count + 1 + ] + ] + + if tier-name = "basic" [ + set processed-basic processed-basic + 1 + ] + if tier-name = "expert" [ + set processed-expert processed-expert + 1 + ] + if tier-name = "coordinator" [ + set processed-coordinator processed-coordinator + 1 + ] + + ask target-case [ + set queue-state "resolved" + set color 7 + set assigned-agent nobody + set ycor min-pycor + random-float 3 + set label word "resolved " severity-band + ] +end + +to-report color-for-band [band] + if band = "low" [ report 45 ] + if band = "moderate" [ report 25 ] + if band = "high" [ report 15 ] + report 125 +end + +to-report color-for-tier [tier-name] + if tier-name = "basic" [ report 57 ] + if tier-name = "expert" [ report 15 ] + report 105 +end + +@#$#@#$#@ +GRAPHICS-WINDOW +230 +10 +747 +528 +-1 +-1 +15.0 +1 +10 +1 +1 +1 +0 +1 +1 +1 +-16 +16 +-16 +16 +1 +1 +1 +ticks +30.0 + +BUTTON +20 +20 +88 +53 +setup +setup +NIL +1 +T +OBSERVER +NIL +NIL +NIL +NIL +1 + +BUTTON +96 +20 +164 +53 +go +go +T +1 +T +OBSERVER +NIL +NIL +NIL +NIL +1 + +BUTTON +20 +60 +164 +93 +new-case +spawn-random-case +NIL +1 +T +OBSERVER +NIL +NIL +NIL +NIL +1 + +MONITOR +20 +110 +163 +155 +LLM Active +llm-ready? +17 +1 +11 + +MONITOR +20 +160 +164 +205 +New Queue +count cases with [queue-state = "new"] +17 +1 +11 + +MONITOR +20 +210 +164 +255 +Triaged Queue +count cases with [queue-state = "triaged"] +17 +1 +11 + +MONITOR +20 +260 +164 +305 +Assigned Queue +count cases with [queue-state = "assigned"] +17 +1 +11 + +MONITOR +20 +310 +164 +355 +Escalations +escalated-count +17 +1 +11 + +MONITOR +20 +360 +164 +405 +Done by Basic +processed-basic +17 +1 +11 + +MONITOR +20 +410 +164 +455 +Done by Expert +processed-expert +17 +1 +11 + +MONITOR +20 +460 +164 +505 +Done by Coordinator +processed-coordinator +17 +1 +11 + +@#$#@#$#@ +## Crisis Triage with Tiered Intelligence Coordination + +This demo simulates emergency incident flow through three responder tiers: + +1. Basic agents handle low complexity cases. +2. Expert agents handle moderate and high severity cases. +3. Coordinators handle critical cases and rebalance misrouted overload. + +Each new incident is triaged with `llm:chat-with-template` using `triage-template.yaml`. +Routing then uses `dispatcher-template.yaml` and capacity-aware fallback logic. + +### Run + +1. Update `demos/crisis-triage/config.txt` with your provider + credentials. +2. Click `setup`. +3. Click `go`. +4. Use `new-case` to inject incidents manually. + +If LLM config is unavailable, the model automatically uses deterministic heuristic triage. +@#$#@#$#@ +default +true +0 +Polygon -7500403 true true 150 5 40 250 150 205 260 250 + +circle +false +0 +Circle -7500403 true true 0 0 300 +@#$#@#$#@ +NetLogo 6.4.0 +@#$#@#$#@ +@#$#@#$#@ +@#$#@#$#@ +@#$#@#$#@ +@#$#@#$#@ +default +0.0 +-0.2 0 0.0 1.0 +0.0 1 1.0 0.0 +0.2 0 0.0 1.0 +link direction +true +0 +Line -7500403 true 150 150 90 180 +Line -7500403 true 150 150 210 180 +@#$#@#$#@ +1 +@#$#@#$#@ From 1f4a1439166c3637166900d34c25329f38cfeb5d Mon Sep 17 00:00:00 2001 From: JNK234 Date: Thu, 26 Feb 2026 21:23:57 -0600 Subject: [PATCH 02/12] demo2: add triage severity prompt template --- demos/crisis-triage/triage-template.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 demos/crisis-triage/triage-template.yaml diff --git a/demos/crisis-triage/triage-template.yaml b/demos/crisis-triage/triage-template.yaml new file mode 100644 index 0000000..3f60565 --- /dev/null +++ b/demos/crisis-triage/triage-template.yaml @@ -0,0 +1,12 @@ +system: "You are an emergency triage specialist. Assess risk conservatively and consistently." +template: | + Incident summary: {incident} + Reported impact: {impact} + Time since report (ticks): {elapsed_ticks} + Context: {known_context} + + Classify this incident severity for a municipal response team. + + Return exactly two lines: + SEVERITY: LOW|MODERATE|HIGH|CRITICAL + JUSTIFICATION: <= 18 words grounded in impact and urgency From addfb8553d171911ec637f4d90504b051e8720ee Mon Sep 17 00:00:00 2001 From: JNK234 Date: Thu, 26 Feb 2026 21:24:00 -0600 Subject: [PATCH 03/12] demo2: add dispatcher routing prompt template --- demos/crisis-triage/dispatcher-template.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 demos/crisis-triage/dispatcher-template.yaml diff --git a/demos/crisis-triage/dispatcher-template.yaml b/demos/crisis-triage/dispatcher-template.yaml new file mode 100644 index 0000000..291a3a4 --- /dev/null +++ b/demos/crisis-triage/dispatcher-template.yaml @@ -0,0 +1,15 @@ +system: "You are a crisis operations dispatcher. Route incidents to maximize response quality under load." +template: | + Severity band: {severity} + Incident summary: {incident} + + Current active load: + BASIC={basic_load} + EXPERT={expert_load} + COORDINATOR={coordinator_load} + + Choose the best tier for this incident considering both severity and current load. + + Return exactly two lines: + ROUTE: BASIC|EXPERT|COORDINATOR + REASON: <= 18 words From 6abc7b31442a7c87fed0cf29be2d8f03c3ef5b9c Mon Sep 17 00:00:00 2001 From: JNK234 Date: Thu, 26 Feb 2026 21:24:09 -0600 Subject: [PATCH 04/12] demo2: add crisis triage LLM config --- demos/crisis-triage/config.txt | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 demos/crisis-triage/config.txt diff --git a/demos/crisis-triage/config.txt b/demos/crisis-triage/config.txt new file mode 100644 index 0000000..0c9fcc1 --- /dev/null +++ b/demos/crisis-triage/config.txt @@ -0,0 +1,20 @@ +# Crisis Triage Demo LLM configuration +# Path is loaded by crisis-triage.nlogo via llm:load-config + +# Recommended local/default option (no cloud key required) +provider=ollama +model=llama3.2:latest +base_url=http://localhost:11434 + +# Runtime behavior +temperature=0.2 +max_tokens=120 +timeout_seconds=45 + +# Optional cloud fallback examples (commented) +# provider=openai +# api_key=YOUR_OPENAI_API_KEY_HERE +# model=gpt-4o-mini +# temperature=0.2 +# max_tokens=120 +# timeout_seconds=45 From 3ba54f7fb1f8eece431d97a3f4a10dd33ba257cd Mon Sep 17 00:00:00 2001 From: JNK234 Date: Thu, 26 Feb 2026 21:24:12 -0600 Subject: [PATCH 05/12] demo2: add crisis triage documentation --- demos/crisis-triage/README.md | 101 ++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 demos/crisis-triage/README.md diff --git a/demos/crisis-triage/README.md b/demos/crisis-triage/README.md new file mode 100644 index 0000000..f3a3f17 --- /dev/null +++ b/demos/crisis-triage/README.md @@ -0,0 +1,101 @@ +# Demo 2: Crisis Triage with Tiered Intelligence Coordination + +This demo models a municipal crisis desk where incidents are triaged by an LLM, routed to one of three response tiers, and dynamically escalated when capacity or risk changes. + +## What it demonstrates + +- Tiered responders: `basic`, `expert`, `coordinator` +- LLM-driven severity assessment via `triage-template.yaml` +- LLM-assisted dispatch recommendation via `dispatcher-template.yaml` +- Capacity-aware fallback routing when a preferred tier is saturated +- Coordinator-triggered escalation for risky or critical in-flight cases +- Automatic heuristic fallback if LLM config/provider is unavailable + +## Deliverables + +- `crisis-triage.nlogo`: NetLogo simulation model +- `triage-template.yaml`: Severity prompt template +- `dispatcher-template.yaml`: Routing prompt template +- `config.txt`: LLM extension configuration +- `tests/`: Automated validation tests + +## Model architecture + +### Agent tiers + +- `basic-agents` + - Highest volume, low-complexity workload + - Lower completion probability for hard cases +- `expert-agents` + - Moderate/high severity handling + - Better completion rates on difficult incidents +- `coordinators` + - Critical incidents and system-level balancing + - Reassign risky cases from lower tiers + +### Incident lifecycle + +1. New incident is created (`queue-state = "new"`) +2. Triage step classifies severity (`low/moderate/high/critical`) +3. Dispatch step chooses preferred tier and applies capacity fallback +4. Case is processed by assigned tier +5. Coordinator may reassign active risky cases +6. Resolved incidents are counted per tier + +## Files and paths + +All files for this demo live in: + +`demos/crisis-triage/` + +The NetLogo model loads these by relative path: + +- `demos/crisis-triage/config.txt` +- `demos/crisis-triage/triage-template.yaml` +- `demos/crisis-triage/dispatcher-template.yaml` + +## Run instructions + +1. Ensure NetLogo has the `llm` extension available. +2. Configure provider settings in `config.txt`. +3. Open `crisis-triage.nlogo` in NetLogo. +4. Click `setup`. +5. Click `go`. +6. Optionally click `new-case` to inject additional incidents. + +## LLM behavior + +- Severity is requested using strict output formatting: + - `SEVERITY: LOW|MODERATE|HIGH|CRITICAL` +- Routing is requested using strict output formatting: + - `ROUTE: BASIC|EXPERT|COORDINATOR` +- Parser logic in the model extracts these tags and falls back safely when missing. + +## Heuristic fallback mode + +If LLM config fails to load or provider calls fail: + +- `llm-ready?` monitor is `false` +- Severity uses keyword-driven deterministic rules +- Routing uses severity-to-tier defaults + capacity fallback + +This keeps the simulation functional offline. + +## Test suite + +Tests are static validations that do not call external APIs. + +Run from repository root: + +```bash +python -m unittest discover -s demos/crisis-triage/tests -p "test_*.py" -v +``` + +Coverage includes: + +- Required files present +- NetLogo model includes tiered breeds and key procedures +- Model references both YAML templates and config +- Template variables match model substitution keys +- Config includes required LLM keys +- README contains usage, architecture, and test instructions From 37387314ccaa178dc7810fc4f6b12d39855beede Mon Sep 17 00:00:00 2001 From: JNK234 Date: Thu, 26 Feb 2026 21:24:15 -0600 Subject: [PATCH 06/12] demo2: add crisis triage validation tests --- demos/crisis-triage/tests/README.md | 15 +++ .../test_crisis_triage.cpython-312.pyc | Bin 0 -> 6594 bytes .../crisis-triage/tests/test_crisis_triage.py | 116 ++++++++++++++++++ 3 files changed, 131 insertions(+) create mode 100644 demos/crisis-triage/tests/README.md create mode 100644 demos/crisis-triage/tests/__pycache__/test_crisis_triage.cpython-312.pyc create mode 100644 demos/crisis-triage/tests/test_crisis_triage.py diff --git a/demos/crisis-triage/tests/README.md b/demos/crisis-triage/tests/README.md new file mode 100644 index 0000000..16032eb --- /dev/null +++ b/demos/crisis-triage/tests/README.md @@ -0,0 +1,15 @@ +# Crisis Triage Demo Tests + +Run from repository root: + +```bash +python -m unittest discover -s demos/crisis-triage/tests -p "test_*.py" -v +``` + +These tests validate: + +- Presence of all required demo files +- NetLogo tiered-agent and triage/dispatch procedure structure +- LLM template variable consistency with model substitutions +- Config key completeness +- README documentation coverage diff --git a/demos/crisis-triage/tests/__pycache__/test_crisis_triage.cpython-312.pyc b/demos/crisis-triage/tests/__pycache__/test_crisis_triage.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..78a8e7117af0195f93f359b149b58214bb13c3f7 GIT binary patch literal 6594 zcmds5TTmO<89u9BiA5I#=5FFz24XB@fl1o>k^~zg1*i=;7$-5dv$NJ7U_lphc4eE5 zJRvjgRE#~fGm{xRlXODb7iXM@KKjv*ndx*!1YFaN`_#VVjZ}WhQ~z^TD+w$xedL5GR;#S=%}|)Az!i#Mz}N{9N={a)gp!Mu+)%1uB@dKD zR`M#9m{;7`Hz_Edo5DmDD?1b~l>Pa>YV3o42VDWEe9%+lKm$mrx``A&=4RSo+|NVa zYMZFVbxQT@0ffjdI3YK4uncX!i9_?;JQu9_k=93ooGF}@_30n!jTp62qH3xZ(upch z;jTn1Ih8c(tXfFN@wu3+(;E`4PjJ(kDYl;Q%5| zM3ah|n1XU8!LlOh__`kCh!e(Ybgsb9xcj_@iPk-nm_2gzSa%p-#j)fZCf$8gIH8}r zCX;ygwIrF<3C3D?G#U#Xel2vcTfy-pR7-EATgRHNu~f1;r2KWw&6@&Uo={X((Y0$K zSwL$Z|DE2&-lZd%-W5-0R_M$@Gm6s0{t8zRhL(FsFI<4`uu1I0&(KqC`V`LYRTzG( z3#vTX*51bcgw7J=*~U!w@Mt)4MmpO!K4?0%xtOYdg9rsXp%P~L<4Fa_B)Cy5B@?lE zQUk5K;IHk0WC8ul3Z|jxlMJ0PxGWE&CgV9ng*)H+acWT4#kn?uX0d2 z1k{yfs2vLbFn{ZpAip8paHgHxt)1r5!gl>{!Xme$0!nkh7h%NY+Dz^(B0y`Th56xBtz*Kf|y2ciov^oWJ!^ zM)*5)>i0hW^y4LMwXt)}Tes$^x~nXnpr!iedlR2dEb&Y4tkicbx-)#{{Msw~zB>K+ z=`V&eLgw6^3yT+SUHo2bUiYKwj`eEftzI~70R&brd^WsuXQ1gG!icKt35ViV9H0O} zWhy-60^|bZPRN~*yC8Q#UIBRpq)<<-8QjY4{ z$Oe$oTVo*oJ==p`*j!QgbjmIE>9(;!x@fmeId5=j4uJY918P~b0`T?O^u^qUufnAr zU!XrYY~XR|c87rGQP44BRK-i3f@owGd?*2X1TIEp4Qm0KUN!>xwkxuxMnhnx z6S@{Csa!T%^Az|x7&zS~DXkS}i6)Z-OsA|T2`o^oT>i$v6u&82;226kLkC=+DNxIx zQMt8ou#d6SG8+nx#$*CG&{Yhmk=Rx=COU9avCTJ<)Xo%n|a!gJ{u|a9UQ30(om9S`= zp=FLFqgc~IRy!`8coGm2Qw?kp*V+n+jZjhqv=ZrrlLiF_5z+-^(_QG*!1sf>H7bUn zJo4J3qXaczpmbSHXhmgG9BlOM2s_(NaRe6Deg#PpLH~Am8r&Iot_juewN%RR;_&_U zB_Vs@4W>`_+xlehi-0NUlXaN0JYdShAy;Lz`90mjfwy0MT zqHcYlwVb1D5zsiG)##@q^7ys`iEV~%qGTaQt8to6VuQRS?$~CcGSvCWIzj4=N2g^y zM18D63{!TKM)wxPWu|AFF`B1w3Oo-TRI~=M22u5S5Wu-4(V3)>5QSu^5Z&+X zsP{h}Ulo5-s1AHrwbK}DC#jtwAUC`3GZ!MmR+IA1V9 z{wtNP!e|Ge+GA4`^R#0cUIqTN(Ylv$(3B098_+L|{QSzLj}BavLq=a{Lb{wU91b0m zE*}hDy3tFOgXvP^bKq#2qMC@R3Yciqhhy@b1{M(@F{_!r*+lYMg0hsxsz&>{$h(oT zq45jH17}9xOVbE#41Buu;LyNex*s}*`uom+=SNRM_R$r1kOk90uvr1hS&?Hg(_>lc z$cHI8Mh*eZH@s!BMH@z@7XtVwo=(OTh=sKc&Kw8;ICGZV6>uk&(o1m~?n}&68v`!^ z;Z7%d2llB^ID%@fZCq_Sv{D<&h|d~#fBD*1$3H)w-5+^;eWm5>O5-~jFQDqF*t9G* zt$CYFZ}_>BuXL}AsM2To0&hN=ej*P1e_@SK)74AyPmV!iincN_-OS~aVo6ysJ(h20 zRjNyDK5H@dj*Xrlj~ox2>N_{opN_nLHZnGz?jIc;3lEL-jgOAmxN4=#&|sCkMU!5d zP=B0(iJXMeM#%Iq$P`uJ4q$WoC9wH#CZpb3DRd66)b{KQI_Eb-=g6a`C*r9Wp;Pb- zFTq-+hQ6GdQfLH7eM72p3MvZ*=D#!q3g8=Bab6kU&}oX`^;`vYK};u;GSfpP7ZVwt z%&_T>%hx46Ig1mT>DSdb1}{+3FsKkkGa7B53|<2QY9eo(AzI{F*iYJE6O510z}$G3 zWt~Y2bZlhn9>!LyTbhD-aa*B*D!q^3&RIb&3hr4n%IK$42;D$p8kdPxB-1tj@r zH_{H9-N*s@Rv5>oKrKKy7Fhj^1<@qIl-JhmFJr-01E*mN+5t!?3+g`u7PMwuIft{} zoe8gdk*|iap?%5q#M|+c51QHC2OWf(>k)WS5hW=h$FU@tZiyO#RE(Crl0p> z>9YkKq5^`}Wb7)~Q+P9@F|{d%Bg3Oocxa5hw=kOiCbNw+{9$>!y3!bZ%38)4IoAq5utj_tpLi$vV$*+z+Vo2ju?;YWm3q&u4eV zTfNq!>x!rCsi$+<)A^up+0(TkuKDV-O`&C9Xu-YKxHsE+bh+_p7S*k_v}O07Ty8m; zMNMl}4fkJJu6lLBLrX2oRqYF&br;7S%INC|vip3FX6vGZd+mO04nel=c5~Htp&PO# zVd>n1wx##KuFBEs^WA4S?!c1xG|;mg=y{H4ZT$lW61*AXd%k&1s96~N-S8*Fza3rY Jk+7R_=09j|XEp!; literal 0 HcmV?d00001 diff --git a/demos/crisis-triage/tests/test_crisis_triage.py b/demos/crisis-triage/tests/test_crisis_triage.py new file mode 100644 index 0000000..ff22fc7 --- /dev/null +++ b/demos/crisis-triage/tests/test_crisis_triage.py @@ -0,0 +1,116 @@ +import re +import unittest +from pathlib import Path + + +DEMO_DIR = Path(__file__).resolve().parents[1] +MODEL_PATH = DEMO_DIR / "crisis-triage.nlogo" +TRIAGE_TEMPLATE_PATH = DEMO_DIR / "triage-template.yaml" +DISPATCHER_TEMPLATE_PATH = DEMO_DIR / "dispatcher-template.yaml" +CONFIG_PATH = DEMO_DIR / "config.txt" +README_PATH = DEMO_DIR / "README.md" + + +def read(path: Path) -> str: + return path.read_text(encoding="utf-8") + + +def model_code_only() -> str: + # NetLogo source code appears before the first section delimiter. + return read(MODEL_PATH).split("@#$#@#$#@")[0] + + +def parse_config(path: Path) -> dict[str, str]: + data: dict[str, str] = {} + for raw in read(path).splitlines(): + line = raw.strip() + if not line or line.startswith("#"): + continue + if "=" not in line: + continue + key, value = line.split("=", 1) + data[key.strip()] = value.strip() + return data + + +class TestCrisisTriageArtifacts(unittest.TestCase): + def test_required_files_exist(self) -> None: + required = [ + MODEL_PATH, + TRIAGE_TEMPLATE_PATH, + DISPATCHER_TEMPLATE_PATH, + CONFIG_PATH, + README_PATH, + ] + for path in required: + self.assertTrue(path.exists(), f"missing file: {path}") + + def test_model_declares_tiered_breeds(self) -> None: + code = model_code_only() + self.assertIn("breed [cases case]", code) + self.assertIn("breed [basic-agents basic-agent]", code) + self.assertIn("breed [expert-agents expert-agent]", code) + self.assertIn("breed [coordinators coordinator]", code) + + def test_model_contains_required_procedures(self) -> None: + code = model_code_only() + procedures = [ + "to setup", + "to setup-llm", + "to triage-new-cases", + "to perform-triage", + "to route-triaged-cases", + "to dispatch-case", + "to coordinator-rebalance", + "to reassign-case", + "to process-assigned-cases", + "to finalize-case", + ] + for proc in procedures: + self.assertIn(proc, code, f"missing procedure: {proc}") + + def test_model_uses_llm_templates_and_config(self) -> None: + code = model_code_only() + self.assertIn('set config-path "demos/crisis-triage/config.txt"', code) + self.assertIn('set triage-template-path "demos/crisis-triage/triage-template.yaml"', code) + self.assertIn('set dispatcher-template-path "demos/crisis-triage/dispatcher-template.yaml"', code) + self.assertIn("llm:chat-with-template triage-template-path", code) + self.assertIn("llm:chat-with-template dispatcher-template-path", code) + self.assertIn("heuristic-severity-report", code) + + def test_triage_template_placeholders_match_model(self) -> None: + template = read(TRIAGE_TEMPLATE_PATH) + placeholders = set(re.findall(r"\{([a-zA-Z_][a-zA-Z0-9_]*)\}", template)) + self.assertEqual( + placeholders, + {"incident", "impact", "elapsed_ticks", "known_context"}, + ) + self.assertIn("SEVERITY: LOW|MODERATE|HIGH|CRITICAL", template) + + def test_dispatcher_template_placeholders_match_model(self) -> None: + template = read(DISPATCHER_TEMPLATE_PATH) + placeholders = set(re.findall(r"\{([a-zA-Z_][a-zA-Z0-9_]*)\}", template)) + self.assertEqual( + placeholders, + {"severity", "incident", "basic_load", "expert_load", "coordinator_load"}, + ) + self.assertIn("ROUTE: BASIC|EXPERT|COORDINATOR", template) + + def test_config_has_required_keys(self) -> None: + config = parse_config(CONFIG_PATH) + for key in ["provider", "model", "temperature", "max_tokens", "timeout_seconds"]: + self.assertIn(key, config, f"missing key in config: {key}") + + def test_readme_has_core_sections(self) -> None: + readme = read(README_PATH) + for text in [ + "What it demonstrates", + "Model architecture", + "Run instructions", + "Test suite", + ]: + self.assertIn(text, readme) + + +if __name__ == "__main__": + unittest.main() From 3a7879eb2b5b190e42ee14ae5b33f368870b8d6e Mon Sep 17 00:00:00 2001 From: JNK234 Date: Wed, 4 Mar 2026 23:57:55 -0600 Subject: [PATCH 07/12] demo2: harden crisis-triage file resolution and hold notes --- demos/crisis-triage/crisis-triage.nlogo | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/demos/crisis-triage/crisis-triage.nlogo b/demos/crisis-triage/crisis-triage.nlogo index 83d28d1..c587a75 100644 --- a/demos/crisis-triage/crisis-triage.nlogo +++ b/demos/crisis-triage/crisis-triage.nlogo @@ -42,6 +42,9 @@ to setup set config-path "demos/crisis-triage/config.txt" set triage-template-path "demos/crisis-triage/triage-template.yaml" set dispatcher-template-path "demos/crisis-triage/dispatcher-template.yaml" + set config-path resolve-existing-path config-path "config.txt" + set triage-template-path resolve-existing-path triage-template-path "triage-template.yaml" + set dispatcher-template-path resolve-existing-path dispatcher-template-path "dispatcher-template.yaml" set processed-basic 0 set processed-expert 0 @@ -256,7 +259,9 @@ to dispatch-case [target-case] if final-tier = "hold" [ ask target-case [ - set handling-notes (word handling-notes " | waiting-capacity") + if position "waiting-capacity" handling-notes = false [ + set handling-notes (word handling-notes " | waiting-capacity") + ] ] stop ] @@ -487,6 +492,12 @@ to-report color-for-tier [tier-name] report 105 end +to-report resolve-existing-path [primary fallback] + if file-exists? primary [ report primary ] + if file-exists? fallback [ report fallback ] + report primary +end + @#$#@#$#@ GRAPHICS-WINDOW 230 From a2141adf607bd5b9597baf238c3550dfd43a5dd4 Mon Sep 17 00:00:00 2001 From: JNK234 Date: Thu, 5 Mar 2026 17:33:09 -0600 Subject: [PATCH 08/12] Upgrade crisis triage demo to NetLogo 7 .nlogox --- demos/crisis-triage/README.md | 21 +- demos/crisis-triage/config.txt | 16 +- ...isis-triage.nlogo => crisis-triage.nlogox} | 281 +++++------------- demos/crisis-triage/tests/README.md | 2 +- .../crisis-triage/tests/test_crisis_triage.py | 9 +- 5 files changed, 108 insertions(+), 221 deletions(-) rename demos/crisis-triage/{crisis-triage.nlogo => crisis-triage.nlogox} (74%) diff --git a/demos/crisis-triage/README.md b/demos/crisis-triage/README.md index f3a3f17..4b42e58 100644 --- a/demos/crisis-triage/README.md +++ b/demos/crisis-triage/README.md @@ -2,6 +2,8 @@ This demo models a municipal crisis desk where incidents are triaged by an LLM, routed to one of three response tiers, and dynamically escalated when capacity or risk changes. +Target runtime: NetLogo 7.0.3 (`.nlogox` model format). + ## What it demonstrates - Tiered responders: `basic`, `expert`, `coordinator` @@ -13,7 +15,7 @@ This demo models a municipal crisis desk where incidents are triaged by an LLM, ## Deliverables -- `crisis-triage.nlogo`: NetLogo simulation model +- `crisis-triage.nlogox`: NetLogo 7 simulation model (canonical) - `triage-template.yaml`: Severity prompt template - `dispatcher-template.yaml`: Routing prompt template - `config.txt`: LLM extension configuration @@ -56,13 +58,18 @@ The NetLogo model loads these by relative path: ## Run instructions -1. Ensure NetLogo has the `llm` extension available. -2. Configure provider settings in `config.txt`. -3. Open `crisis-triage.nlogo` in NetLogo. +1. Ensure NetLogo 7.0.3 has the `llm` extension available. +2. Configure provider settings in `config.txt` (default is local Ollama). +3. Open `crisis-triage.nlogox` in NetLogo. 4. Click `setup`. 5. Click `go`. 6. Optionally click `new-case` to inject additional incidents. +## NetLogo 7 validation guidance + +- Primary validation should be GUI-based in NetLogo 7.0.3 (`setup`, then run `go` for multiple ticks). +- Headless checks can be useful for smoke testing, but GUI validation is recommended as the canonical check due known NetLogo 7 headless/BehaviorSpace limitations. + ## LLM behavior - Severity is requested using strict output formatting: @@ -81,6 +88,12 @@ If LLM config fails to load or provider calls fail: This keeps the simulation functional offline. +## Provider configuration notes + +- Default `config.txt` is safe and local-first (`provider=ollama`) with no secrets. +- Optional cloud examples are commented in `config.txt` for OpenAI, Claude, and Gemini. +- Never commit real API keys into demo configs. + ## Test suite Tests are static validations that do not call external APIs. diff --git a/demos/crisis-triage/config.txt b/demos/crisis-triage/config.txt index 0c9fcc1..e463e9d 100644 --- a/demos/crisis-triage/config.txt +++ b/demos/crisis-triage/config.txt @@ -1,5 +1,5 @@ # Crisis Triage Demo LLM configuration -# Path is loaded by crisis-triage.nlogo via llm:load-config +# Path is loaded by crisis-triage.nlogox via llm:load-config # Recommended local/default option (no cloud key required) provider=ollama @@ -18,3 +18,17 @@ timeout_seconds=45 # temperature=0.2 # max_tokens=120 # timeout_seconds=45 + +# provider=claude +# api_key=YOUR_ANTHROPIC_API_KEY_HERE +# model=claude-3-5-haiku-latest +# temperature=0.2 +# max_tokens=120 +# timeout_seconds=45 + +# provider=gemini +# api_key=YOUR_GEMINI_API_KEY_HERE +# model=gemini-2.0-flash +# temperature=0.2 +# max_tokens=120 +# timeout_seconds=45 diff --git a/demos/crisis-triage/crisis-triage.nlogo b/demos/crisis-triage/crisis-triage.nlogox similarity index 74% rename from demos/crisis-triage/crisis-triage.nlogo rename to demos/crisis-triage/crisis-triage.nlogox index c587a75..8f47061 100644 --- a/demos/crisis-triage/crisis-triage.nlogo +++ b/demos/crisis-triage/crisis-triage.nlogox @@ -1,4 +1,6 @@ -extensions [ llm ] + + + + + + + + + llm-ready? + count cases with [queue-state = "new"] + count cases with [queue-state = "triaged"] + count cases with [queue-state = "assigned"] + escalated-count + processed-basic + processed-expert + processed-coordinator + + ## Crisis Triage with Tiered Intelligence Coordination This demo simulates emergency incident flow through three responder tiers: @@ -684,34 +532,43 @@ Routing then uses `dispatcher-template.yaml` and capacity-aware fallback logic. 3. Click `go`. 4. Use `new-case` to inject incidents manually. -If LLM config is unavailable, the model automatically uses deterministic heuristic triage. -@#$#@#$#@ -default -true -0 -Polygon -7500403 true true 150 5 40 250 150 205 260 250 - -circle -false -0 -Circle -7500403 true true 0 0 300 -@#$#@#$#@ -NetLogo 6.4.0 -@#$#@#$#@ -@#$#@#$#@ -@#$#@#$#@ -@#$#@#$#@ -@#$#@#$#@ -default -0.0 --0.2 0 0.0 1.0 -0.0 1 1.0 0.0 -0.2 0 0.0 1.0 -link direction -true -0 -Line -7500403 true 150 150 90 180 -Line -7500403 true 150 150 210 180 -@#$#@#$#@ -1 -@#$#@#$#@ +If LLM config is unavailable, the model automatically uses deterministic heuristic triage. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + setup repeat 75 [ go ] + diff --git a/demos/crisis-triage/tests/README.md b/demos/crisis-triage/tests/README.md index 16032eb..fc5ec4b 100644 --- a/demos/crisis-triage/tests/README.md +++ b/demos/crisis-triage/tests/README.md @@ -9,7 +9,7 @@ python -m unittest discover -s demos/crisis-triage/tests -p "test_*.py" -v These tests validate: - Presence of all required demo files -- NetLogo tiered-agent and triage/dispatch procedure structure +- NetLogo 7 `.nlogox` tiered-agent and triage/dispatch procedure structure - LLM template variable consistency with model substitutions - Config key completeness - README documentation coverage diff --git a/demos/crisis-triage/tests/test_crisis_triage.py b/demos/crisis-triage/tests/test_crisis_triage.py index ff22fc7..842d455 100644 --- a/demos/crisis-triage/tests/test_crisis_triage.py +++ b/demos/crisis-triage/tests/test_crisis_triage.py @@ -4,7 +4,7 @@ DEMO_DIR = Path(__file__).resolve().parents[1] -MODEL_PATH = DEMO_DIR / "crisis-triage.nlogo" +MODEL_PATH = DEMO_DIR / "crisis-triage.nlogox" TRIAGE_TEMPLATE_PATH = DEMO_DIR / "triage-template.yaml" DISPATCHER_TEMPLATE_PATH = DEMO_DIR / "dispatcher-template.yaml" CONFIG_PATH = DEMO_DIR / "config.txt" @@ -16,8 +16,11 @@ def read(path: Path) -> str: def model_code_only() -> str: - # NetLogo source code appears before the first section delimiter. - return read(MODEL_PATH).split("@#$#@#$#@")[0] + xml = read(MODEL_PATH) + match = re.search(r"", xml, re.DOTALL) + if not match: + raise AssertionError("unable to parse from model") + return match.group(1) def parse_config(path: Path) -> dict[str, str]: From 5cba12fdb7bb679eea2081f72dc1a43d7f6aae25 Mon Sep 17 00:00:00 2001 From: JNK234 Date: Thu, 5 Mar 2026 20:09:40 -0600 Subject: [PATCH 09/12] demo2: strengthen crisis-triage tests with XML parsing and regression checks Replace regex-based .nlogox parsing with xml.etree.ElementTree for proper XML validation. Add three new test classes (19 tests): - TestModelXmlParsing: validates CDATA sections, widgets structure, button/monitor counts, turtle shapes via real XML parsing - TestModelStructure: asserts NetLogo 7.0.3 version, required top-level sections (code, widgets, info, turtleShapes, linkShapes, previewCommands) - TestBehaviorRegression: ensures list syntax for chat-with-template, no deprecated primitives, balanced to/end blocks, globals and owns All 8 original tests preserved and passing (27 total). --- .../crisis-triage/tests/test_crisis_triage.py | 167 +++++++++++++++++- 1 file changed, 162 insertions(+), 5 deletions(-) diff --git a/demos/crisis-triage/tests/test_crisis_triage.py b/demos/crisis-triage/tests/test_crisis_triage.py index 842d455..55b43ae 100644 --- a/demos/crisis-triage/tests/test_crisis_triage.py +++ b/demos/crisis-triage/tests/test_crisis_triage.py @@ -1,5 +1,6 @@ import re import unittest +import xml.etree.ElementTree as ET from pathlib import Path @@ -15,12 +16,18 @@ def read(path: Path) -> str: return path.read_text(encoding="utf-8") +def parse_model() -> ET.Element: + """Parse the .nlogox model file as XML and return the root element.""" + return ET.parse(MODEL_PATH).getroot() + + def model_code_only() -> str: - xml = read(MODEL_PATH) - match = re.search(r"", xml, re.DOTALL) - if not match: - raise AssertionError("unable to parse from model") - return match.group(1) + """Extract the NetLogo code from the CDATA section using XML parsing.""" + root = parse_model() + code_elem = root.find("code") + if code_elem is None or code_elem.text is None: + raise AssertionError("unable to extract content from model XML") + return code_elem.text def parse_config(path: Path) -> dict[str, str]: @@ -115,5 +122,155 @@ def test_readme_has_core_sections(self) -> None: self.assertIn(text, readme) +class TestModelXmlParsing(unittest.TestCase): + """Validate the .nlogox file using proper XML parsing instead of regex.""" + + def setUp(self) -> None: + self.root = parse_model() + + def test_model_parses_as_valid_xml(self) -> None: + self.assertEqual(self.root.tag, "model") + + def test_code_element_contains_cdata_content(self) -> None: + code_elem = self.root.find("code") + self.assertIsNotNone(code_elem, "missing element") + self.assertIsNotNone(code_elem.text, " element has no text content") + self.assertIn("extensions [ llm ]", code_elem.text) + + def test_raw_file_preserves_cdata_wrapping(self) -> None: + raw = read(MODEL_PATH) + self.assertIn("", raw) + + def test_widgets_section_has_expected_children(self) -> None: + widgets = self.root.find("widgets") + self.assertIsNotNone(widgets, "missing section") + child_tags = [child.tag for child in widgets] + self.assertIn("view", child_tags) + self.assertIn("button", child_tags) + self.assertIn("monitor", child_tags) + + def test_widgets_button_count(self) -> None: + widgets = self.root.find("widgets") + buttons = widgets.findall("button") + self.assertEqual(len(buttons), 3, "expected 3 buttons: setup, go, new-case") + + def test_widgets_monitor_count(self) -> None: + widgets = self.root.find("widgets") + monitors = widgets.findall("monitor") + self.assertGreaterEqual(len(monitors), 7, "expected at least 7 monitors") + + def test_turtle_shapes_defined(self) -> None: + shapes = self.root.find("turtleShapes") + self.assertIsNotNone(shapes, "missing section") + shape_names = [s.get("name") for s in shapes.findall("shape")] + self.assertIn("default", shape_names) + self.assertIn("circle", shape_names) + + +class TestModelStructure(unittest.TestCase): + """Structural assertions on the NetLogo 7.x .nlogox format.""" + + def setUp(self) -> None: + self.root = parse_model() + + def test_netlogo_version_is_7_0_3(self) -> None: + version = self.root.get("version") + self.assertEqual(version, "NetLogo 7.0.3") + + def test_required_top_level_sections_exist(self) -> None: + required_sections = [ + "code", "widgets", "info", "turtleShapes", "linkShapes", + "previewCommands", + ] + present = {child.tag for child in self.root} + for section in required_sections: + self.assertIn(section, present, f"missing top-level section: {section}") + + def test_info_section_not_empty(self) -> None: + info = self.root.find("info") + self.assertIsNotNone(info, "missing section") + self.assertTrue( + info.text and len(info.text.strip()) > 0, + " section is empty", + ) + + def test_preview_commands_present(self) -> None: + preview = self.root.find("previewCommands") + self.assertIsNotNone(preview) + self.assertIn("setup", preview.text) + + def test_link_shapes_has_default(self) -> None: + link_shapes = self.root.find("linkShapes") + self.assertIsNotNone(link_shapes, "missing ") + names = [s.get("name") for s in link_shapes.findall("shape")] + self.assertIn("default", names) + + +class TestBehaviorRegression(unittest.TestCase): + """Catch regressions in model syntax and LLM extension usage patterns.""" + + def setUp(self) -> None: + self.code = model_code_only() + + def test_extensions_declaration_present(self) -> None: + self.assertIn("extensions [ llm ]", self.code) + + def test_chat_with_template_uses_list_syntax(self) -> None: + """Ensure llm:chat-with-template uses (list ...) not [...] for variables.""" + lines = self.code.splitlines() + for line in lines: + stripped = line.strip() + if "llm:chat-with-template" not in stripped: + continue + # The template call should be followed by (list on the same or next + # logical line. It must NOT use bracket syntax like [["key" val]]. + self.assertNotRegex( + stripped, + r'llm:chat-with-template\s+\S+\s+\[\[', + f"bracket syntax found instead of (list ...): {stripped}", + ) + + def test_no_inline_provider_setup_in_procedures(self) -> None: + """Model should use llm:load-config, not manual set-provider/set-api-key.""" + for deprecated in ["llm:set-provider", "llm:set-api-key", "llm:set-model"]: + self.assertNotIn( + deprecated, + self.code, + f"deprecated inline primitive found: {deprecated}", + ) + + def test_all_procedure_blocks_are_closed(self) -> None: + """Every 'to' or 'to-report' must have a matching 'end'.""" + opens = len(re.findall(r"^to(?:-report)?\s", self.code, re.MULTILINE)) + closes = len(re.findall(r"^end\s*$", self.code, re.MULTILINE)) + self.assertEqual( + opens, + closes, + f"mismatched procedure blocks: {opens} opens vs {closes} ends", + ) + + def test_no_deprecated_primitives(self) -> None: + """Guard against usage of removed or renamed LLM extension primitives.""" + deprecated = [ + "llm:ask", + "llm:send", + "llm:query", + "llm:prompt", + ] + for prim in deprecated: + self.assertNotIn(prim, self.code, f"deprecated primitive: {prim}") + + def test_globals_declared(self) -> None: + self.assertIn("globals [", self.code) + for g in ["llm-ready?", "config-path", "triage-template-path", + "dispatcher-template-path"]: + self.assertIn(g, self.code, f"missing global: {g}") + + def test_breed_owns_blocks_present(self) -> None: + self.assertIn("turtles-own [", self.code) + self.assertIn("cases-own [", self.code) + + if __name__ == "__main__": unittest.main() From d388349edaa6104df3ec7aa0814a9f0f9a62fc92 Mon Sep 17 00:00:00 2001 From: JNK234 Date: Thu, 12 Mar 2026 21:35:31 -0500 Subject: [PATCH 10/12] feat: crisis-triage demo with 3 targeted improvements Complete rewrite of the crisis-triage demo with 30 incidents (10 misleading, 10 clear, 10 borderline), 3 dispatcher personas (Veteran, Rookie, Analyst), episode-based memory management, and A/B heuristic comparison. Three targeted improvements from live testing: - Routing prompt: explicit severity-to-tier mapping rules replacing vague descriptions, expected to raise route accuracy from ~28% to ~50%+ - Visual pipeline: incidents flow through y-axis zones (spawn near dispatchers, stage after triage, move to responders when routed, graveyard when resolved) - Per-persona accuracy monitors: Veteran/Rookie/Analyst individual accuracy displayed in dedicated UI monitors 29 static validation tests pass. --- demos/crisis-triage/README.md | 154 +- demos/crisis-triage/config.txt | 10 +- demos/crisis-triage/crisis-triage.nlogox | 1349 ++++++++++++----- demos/crisis-triage/dispatcher-template.yaml | 28 +- demos/crisis-triage/tests/README.md | 15 +- .../crisis-triage/tests/test_crisis_triage.py | 115 +- demos/crisis-triage/triage-template.yaml | 23 +- 7 files changed, 1158 insertions(+), 536 deletions(-) diff --git a/demos/crisis-triage/README.md b/demos/crisis-triage/README.md index 4b42e58..d7c5509 100644 --- a/demos/crisis-triage/README.md +++ b/demos/crisis-triage/README.md @@ -1,114 +1,100 @@ -# Demo 2: Crisis Triage with Tiered Intelligence Coordination +# Demo 2: Crisis Triage with Ambiguous Incidents -This demo models a municipal crisis desk where incidents are triaged by an LLM, routed to one of three response tiers, and dynamically escalated when capacity or risk changes. +A municipal emergency operations center where LLM-powered dispatchers assess ambiguous crisis reports — demonstrating that keyword matching fails when incidents are deliberately misleading, but LLMs reading full impact descriptions can succeed. Target runtime: NetLogo 7.0.3 (`.nlogox` model format). -## What it demonstrates +## The Story -- Tiered responders: `basic`, `expert`, `coordinator` -- LLM-driven severity assessment via `triage-template.yaml` -- LLM-assisted dispatch recommendation via `dispatcher-template.yaml` -- Capacity-aware fallback routing when a preferred tier is saturated -- Coordinator-triggered escalation for risky or critical in-flight cases -- Automatic heuristic fallback if LLM config/provider is unavailable +Three dispatchers — Veteran, Rookie, and Analyst — receive a stream of crisis incidents. Each must assess severity and route to the right response tier. The incident bank includes **misleading cases** where surface keywords don't match reality: -## Deliverables +- "Toxic chemical spill at school" → actually spilled vinegar (LOW severity) +- "Minor water leak in basement" → threatening a neonatal ICU (CRITICAL severity) +- "Dog loose on highway" → causing a multi-vehicle pileup (HIGH severity) -- `crisis-triage.nlogox`: NetLogo 7 simulation model (canonical) -- `triage-template.yaml`: Severity prompt template -- `dispatcher-template.yaml`: Routing prompt template -- `config.txt`: LLM extension configuration -- `tests/`: Automated validation tests +A naive keyword heuristic over-triggers on "toxic", "fire", "collapse" and fails on these cases. The LLM reads the full impact description and can assess correctly. -## Model architecture +## Quick Start -### Agent tiers +1. Edit `config.txt` with your provider credentials (default: local Ollama). +2. Open `crisis-triage.nlogox` in NetLogo 7.0.3. +3. Click **setup** → dispatchers appear with persona labels, responders by tier. +4. Click **go** → incidents spawn, flow through the pipeline, monitors update. +5. Watch the output log for `[TRIAGE]`, `[ROUTE]`, and `[REFLECT]` messages. -- `basic-agents` - - Highest volume, low-complexity workload - - Lower completion probability for hard cases -- `expert-agents` - - Moderate/high severity handling - - Better completion rates on difficult incidents -- `coordinators` - - Critical incidents and system-level balancing - - Reassign risky cases from lower tiers +## How to Use -### Incident lifecycle +### Controls -1. New incident is created (`queue-state = "new"`) -2. Triage step classifies severity (`low/moderate/high/critical`) -3. Dispatch step chooses preferred tier and applies capacity fallback -4. Case is processed by assigned tier -5. Coordinator may reassign active risky cases -6. Resolved incidents are counted per tier +| Control | Type | Purpose | +|---------|------|---------| +| `use-llm?` | Switch | Toggle between LLM dispatchers and naive heuristic | +| `memory-mode` | Chooser | persistent / per-episode / none | +| `reflection-interval` | Slider | Ticks between dispatcher self-reflection (0 = off) | +| `incident-rate` | Slider | Probability (%) of new incident per tick | +| `episode-length` | Slider | Ticks per episode boundary (0 = no episodes) | +| `add incident` | Button | Manually inject a random incident | +| `force reflect` | Button | Trigger immediate reflection for all dispatchers | -## Files and paths +### What to Observe -All files for this demo live in: +- **Misleading%** — The key metric. Accuracy on misleading incidents where keywords don't match reality. +- **Triage Acc%** / **Route Acc%** — Overall accuracy vs ground truth. +- **Accuracy Over Time** plot — Watch how accuracy evolves, especially with memory. +- **Per-persona differences** — Veteran, Rookie, and Analyst may perform differently. +- **Reflection log** — Dispatchers reason about their own performance. -`demos/crisis-triage/` +## The A/B Experiment -The NetLogo model loads these by relative path: +1. Run with `use-llm?` ON for 50+ ticks. Note the Misleading% metric. +2. Click setup again. Toggle `use-llm?` OFF. Run for 50+ ticks. +3. Compare: + - **Heuristic**: ~30% on misleading cases (keywords mislead it). + - **LLM**: Expected ~70%+ on misleading cases (reads actual impact). +4. Compare memory modes: Run with "persistent" vs "none" over multiple episodes. -- `demos/crisis-triage/config.txt` -- `demos/crisis-triage/triage-template.yaml` -- `demos/crisis-triage/dispatcher-template.yaml` +## LLM Primitives Exercised (8) -## Run instructions +| Primitive | Where | Paper Concept | +|-----------|-------|---------------| +| `llm:load-config` | `setup-llm` | Config management | +| `llm:set-history` | `setup-dispatchers` — persona injection | Personalization (Ch.2) | +| `llm:chat-with-template` | `triage-my-incidents` — severity assessment | Environment/Interface (Ch.1) | +| `llm:choose` | `route-my-incidents` — bounded tier selection | Bounded Rationality | +| `llm:history` | `dispatcher-reflect` — check history length | Memory (Ch.3) | +| `llm:chat` | `dispatcher-reflect` — freeform reflection | Reflection (Ch.3) | +| `llm:clear-history` | `handle-episode-boundary` — configurable reset | Memory ablation | +| `llm:active` | Monitor widget — show provider/model | Provider awareness | -1. Ensure NetLogo 7.0.3 has the `llm` extension available. -2. Configure provider settings in `config.txt` (default is local Ollama). -3. Open `crisis-triage.nlogox` in NetLogo. -4. Click `setup`. -5. Click `go`. -6. Optionally click `new-case` to inject additional incidents. +## Design Rationale -## NetLogo 7 validation guidance +**Why dispatchers use LLM, not responders**: Triage and routing are judgment calls where reading context matters. Case processing is mechanical — it doesn't benefit from language understanding. -- Primary validation should be GUI-based in NetLogo 7.0.3 (`setup`, then run `go` for multiple ticks). -- Headless checks can be useful for smoke testing, but GUI validation is recommended as the canonical check due known NetLogo 7 headless/BehaviorSpace limitations. +**Why no thinking/reasoning models**: With 3 dispatchers making 2+ LLM calls per tick, thinking models would add minutes of latency per tick. The triage task is classification, not multi-step reasoning. Standard `llm:chat-with-template` and `llm:choose` are the right tools. -## LLM behavior +**Why `llm:choose` for routing**: Guarantees the output is one of the valid tier names, avoiding parsing failures from freeform text. -- Severity is requested using strict output formatting: - - `SEVERITY: LOW|MODERATE|HIGH|CRITICAL` -- Routing is requested using strict output formatting: - - `ROUTE: BASIC|EXPERT|COORDINATOR` -- Parser logic in the model extracts these tags and falls back safely when missing. +**Why misleading incidents**: They make the LLM genuinely necessary. Without them, keyword matching achieves similar accuracy and the LLM adds cost without value. -## Heuristic fallback mode +## Paper Connection -If LLM config fails to load or provider calls fail: +This demo implements concepts from the Gao et al. (2312.11970) LLM-ABM survey: -- `llm-ready?` monitor is `false` -- Severity uses keyword-driven deterministic rules -- Routing uses severity-to-tier defaults + capacity fallback +- **Personalization** (Ch.2): Dispatcher personas via `llm:set-history` produce different decisions from the same model. +- **Bounded Rationality**: `llm:choose` constrains decisions to valid options. +- **Memory** (Ch.3): Configurable memory modes show how history retention affects performance. +- **Reflection** (Ch.3): Dispatchers reason about their own accuracy and identify patterns. +- **Environment/Interface** (Ch.1): Templates structure how agents perceive incidents. -This keeps the simulation functional offline. +## Files -## Provider configuration notes +| File | Purpose | +|------|---------| +| `crisis-triage.nlogox` | NetLogo 7 simulation model | +| `triage-template.yaml` | Severity assessment prompt with anti-keyword-bias guidance | +| `dispatcher-template.yaml` | Documentation stub (routing uses `llm:choose`) | +| `config.txt` | LLM provider configuration | -- Default `config.txt` is safe and local-first (`provider=ollama`) with no secrets. -- Optional cloud examples are commented in `config.txt` for OpenAI, Claude, and Gemini. -- Never commit real API keys into demo configs. +## Provider Configuration -## Test suite - -Tests are static validations that do not call external APIs. - -Run from repository root: - -```bash -python -m unittest discover -s demos/crisis-triage/tests -p "test_*.py" -v -``` - -Coverage includes: - -- Required files present -- NetLogo model includes tiered breeds and key procedures -- Model references both YAML templates and config -- Template variables match model substitution keys -- Config includes required LLM keys -- README contains usage, architecture, and test instructions +Default is local Ollama (no API key needed). See commented examples in `config.txt` for OpenAI, Claude, and Gemini. Never commit real API keys. diff --git a/demos/crisis-triage/config.txt b/demos/crisis-triage/config.txt index e463e9d..aed3eab 100644 --- a/demos/crisis-triage/config.txt +++ b/demos/crisis-triage/config.txt @@ -3,12 +3,12 @@ # Recommended local/default option (no cloud key required) provider=ollama -model=llama3.2:latest +model=llama3.2:3b base_url=http://localhost:11434 # Runtime behavior temperature=0.2 -max_tokens=120 +max_tokens=200 timeout_seconds=45 # Optional cloud fallback examples (commented) @@ -16,19 +16,19 @@ timeout_seconds=45 # api_key=YOUR_OPENAI_API_KEY_HERE # model=gpt-4o-mini # temperature=0.2 -# max_tokens=120 +# max_tokens=200 # timeout_seconds=45 # provider=claude # api_key=YOUR_ANTHROPIC_API_KEY_HERE # model=claude-3-5-haiku-latest # temperature=0.2 -# max_tokens=120 +# max_tokens=200 # timeout_seconds=45 # provider=gemini # api_key=YOUR_GEMINI_API_KEY_HERE # model=gemini-2.0-flash # temperature=0.2 -# max_tokens=120 +# max_tokens=200 # timeout_seconds=45 diff --git a/demos/crisis-triage/crisis-triage.nlogox b/demos/crisis-triage/crisis-triage.nlogox index 8f47061..873b00e 100644 --- a/demos/crisis-triage/crisis-triage.nlogox +++ b/demos/crisis-triage/crisis-triage.nlogox @@ -1,538 +1,1120 @@ - + create-dispatchers 1 [ + set persona-name item 0 p + set persona-prompt item 1 p + set my-triaged 0 + set my-correct-triage 0 + set my-routed 0 + set my-correct-route 0 + set shape "person" + set size 2.5 + set color blue + 2 + setxy px 14 + set label persona-name + set px px + 7 + + ;; Inject persona via llm:set-history if LLM is active + if llm-ready? and use-llm? [ + carefully [ + llm:set-history (list + (list "system" persona-prompt) + ) + ] [ + output-print (word "[SETUP] Failed to set history for " persona-name ": " error-message) + ] + ] + ] + ] +end + +;; --------------------------------------------------------------------------- +;; Setup Responders (3 BASIC cap=3, 3 EXPERT cap=2, 3 COORDINATOR cap=1) +;; --------------------------------------------------------------------------- + to setup-responders - create-basic-agents 7 [ - set tier "basic" - set capacity 2 + let base-x -12 + ;; BASIC responders + create-responders 3 [ + set tier "BASIC" + set capacity 3 set current-load 0 - set processed-count 0 - set color 57 - set size 1.6 + set resolved-count 0 set shape "circle" - setxy (-13 + random-float 6) (-12 + random-float 24) + set size 1.5 + set color green + 1 + set label "B" + ] + let idx 0 + ask responders with [ tier = "BASIC" ] [ + setxy (base-x + idx * 3) -12 + set idx idx + 1 ] - create-expert-agents 4 [ - set tier "expert" + ;; EXPERT responders + create-responders 3 [ + set tier "EXPERT" set capacity 2 set current-load 0 - set processed-count 0 - set color 15 - set size 1.8 + set resolved-count 0 set shape "circle" - setxy (-3 + random-float 6) (-12 + random-float 24) + set size 1.8 + set color orange + 1 + set label "E" + ] + set idx 0 + ask responders with [ tier = "EXPERT" ] [ + setxy (base-x + 10 + idx * 3) -12 + set idx idx + 1 ] - create-coordinators 2 [ - set tier "coordinator" - set capacity 3 + ;; COORDINATOR responders + create-responders 3 [ + set tier "COORDINATOR" + set capacity 1 set current-load 0 - set processed-count 0 - set color 105 - set size 2.1 + set resolved-count 0 set shape "circle" - setxy (8 + random-float 6) (-12 + random-float 24) + set size 2.1 + set color violet + 1 + set label "C" + ] + set idx 0 + ask responders with [ tier = "COORDINATOR" ] [ + setxy (base-x + 20 + idx * 3) -12 + set idx idx + 1 ] end -to create-initial-cases [n] - repeat n [ spawn-random-case ] +;; --------------------------------------------------------------------------- +;; Incident Bank (30 incidents: 10 misleading + 10 clear + 10 borderline) +;; --------------------------------------------------------------------------- + +to build-incident-bank + ;; Each entry: [summary impact ground-truth-severity ground-truth-tier category] + ;; MISLEADING: keywords suggest one severity but actual impact warrants another + set incident-bank (list + ;; --- MISLEADING (10): keywords mislead naive classifiers --- + (list "Server room fire alarm triggered" + "Sensor malfunction confirmed; no smoke, no heat, systems normal" + "LOW" "BASIC" "misleading") + (list "Toxic chemical spill reported at school" + "Custodian knocked over a bottle of vinegar in the cafeteria; no injuries" + "LOW" "BASIC" "misleading") + (list "Building collapse alert downtown" + "Scaffolding panel fell on empty sidewalk at 3 AM; no structural damage to building" + "LOW" "BASIC" "misleading") + (list "Mass casualty incident at concert venue" + "Three people fainted from heat; all conscious, first aid on scene" + "MODERATE" "BASIC" "misleading") + (list "Explosion heard near hospital" + "Transformer blew on adjacent street; hospital on backup power, no injuries" + "MODERATE" "EXPERT" "misleading") + (list "Data center cooling failure" + "Regional hospital patient records, 911 dispatch system, and pharmacy networks all depend on this center; 30 minutes to critical thermal threshold" + "CRITICAL" "COORDINATOR" "misleading") + (list "Minor water leak in basement" + "Leak is in the electrical vault supplying the neonatal ICU; backup generators have 45 minutes of fuel" + "CRITICAL" "COORDINATOR" "misleading") + (list "Small kitchen fire at restaurant" + "Fire spreading to adjacent apartment building; 40 residents trapped above; fire department 20 minutes away" + "CRITICAL" "COORDINATOR" "misleading") + (list "Routine power fluctuation reported" + "Affecting traffic signals across 12 intersections during school dismissal; two near-miss accidents already" + "HIGH" "EXPERT" "misleading") + (list "Dog loose on highway" + "Causing multi-vehicle chain reaction on I-95; 6 cars involved, injuries reported, highway blocked both directions" + "HIGH" "EXPERT" "misleading") + + ;; --- CLEAR (10): keywords and impact align --- + (list "Multi-vehicle pileup on interstate" + "12 vehicles, multiple injuries confirmed, highway fully blocked, EMS requesting additional units" + "CRITICAL" "COORDINATOR" "clear") + (list "Warehouse fire with toxic plume" + "Residential area downwind being evacuated; 500+ people displaced; air quality hazardous" + "CRITICAL" "COORDINATOR" "clear") + (list "Earthquake damage to bridge" + "Visible structural cracks; bridge closed; 50,000 daily commuters affected; engineers en route" + "CRITICAL" "COORDINATOR" "clear") + (list "School bus accident with injuries" + "Bus overturned; 8 children with minor-moderate injuries; parents arriving at scene" + "HIGH" "EXPERT" "clear") + (list "Chemical plant pressure valve failure" + "Controlled venting in progress; shelter-in-place advisory for 2-mile radius; monitoring air quality" + "HIGH" "EXPERT" "clear") + (list "Hospital generator test failure" + "Backup generator failed routine test; primary power stable; repair crew dispatched for same-day fix" + "MODERATE" "BASIC" "clear") + (list "Broken water main on residential street" + "Low-pressure water to 30 homes; repair crew en route; estimated 4-hour fix" + "MODERATE" "BASIC" "clear") + (list "Traffic signal malfunction at intersection" + "Single intersection flashing red; police directing traffic; no accidents" + "LOW" "BASIC" "clear") + (list "Park trail flooding after rain" + "Trails closed; no hikers in area; water receding naturally" + "LOW" "BASIC" "clear") + (list "Streetlight outage on residential block" + "Six streetlights out; residents notified; maintenance scheduled for morning" + "LOW" "BASIC" "clear") + + ;; --- BORDERLINE (10): genuinely ambiguous, reasonable people could disagree --- + (list "Subway train stalled between stations" + "200 passengers stuck for 25 minutes; ventilation working; rescue train dispatched; some passengers anxious" + "MODERATE" "EXPERT" "borderline") + (list "Power outage at nursing home" + "Backup generator active; 60 residents comfortable; generator fuel for 8 hours; utility ETA unknown" + "HIGH" "EXPERT" "borderline") + (list "Gas smell reported near elementary school" + "School in session; gas company en route; no readings yet; precautionary evacuation being considered" + "HIGH" "EXPERT" "borderline") + (list "Protest blocking major intersection" + "500 people; peaceful but not dispersing; ambulance rerouting adds 8 minutes to hospital route" + "MODERATE" "EXPERT" "borderline") + (list "Crane malfunction at construction site" + "Crane arm stuck over occupied building; no immediate danger but wind advisory in effect for afternoon" + "HIGH" "EXPERT" "borderline") + (list "River level rising near flood stage" + "2 feet below flood level; rain expected to continue 6 hours; 200 homes in potential flood zone" + "HIGH" "COORDINATOR" "borderline") + (list "Suspicious package at government building" + "Building evacuated; bomb squad 15 minutes away; 300 workers displaced; likely false alarm based on description" + "MODERATE" "EXPERT" "borderline") + (list "Internet outage affecting emergency services" + "911 calls routing to backup center; 12-second additional delay per call; estimated 2-hour repair" + "HIGH" "EXPERT" "borderline") + (list "Heat wave shelter capacity reached" + "Main cooling center full at 150 people; overflow into library planned; 3 elderly residents showing heat stress" + "MODERATE" "EXPERT" "borderline") + (list "Airport runway incursion reported" + "Ground vehicle crossed active runway; no aircraft in immediate path; runway closed for inspection" + "MODERATE" "EXPERT" "borderline") + ) end -to spawn-random-case - let incident-bank (list - (list "Server room smoke alarm" "Power instability in two hospital wings") - (list "Water main rupture" "Transit junction flooded during rush hour") - (list "School bus collision" "Multiple injuries and blocked arterial road") - (list "Warehouse fire flare-up" "Toxic plume reported near residential area") - (list "Regional telecom outage" "Emergency call latency above safe threshold") - (list "Chemical lab leak" "Evacuation radius requested by fire command") - (list "Bridge vibration alert" "Potential structural failure during peak traffic") - (list "Heat wave brownout" "Critical care equipment on backup power") - (list "Subway security incident" "Crowd panic and platform injuries") - (list "Data center cooling loss" "City payment systems offline") - ) +;; =========================================================================== +;; GO LOOP +;; =========================================================================== - let picked one-of incident-bank - create-cases 1 [ - set tier "case" - set capacity 0 - set current-load 0 - set processed-count 0 - - set incident-summary item 0 picked - set reported-impact item 1 picked - set severity-band "unassessed" - set severity-score -1 - set queue-state "new" - set assigned-tier "none" - set assigned-agent nobody - set handling-notes "" - set created-at ticks +to go + ;; Episode boundary check + handle-episode-boundary - set color yellow - set size 1.3 - set shape "circle" - setxy (random-xcor) (max-pycor - random-float 6) + ;; Spawn new incidents + if random 100 < incident-rate [ + spawn-incident ] -end -to go - if random-float 1 < case-arrival-probability [ - spawn-random-case + ;; Dispatchers triage and route + ask dispatchers [ + triage-my-incidents + route-my-incidents ] - triage-new-cases - route-triaged-cases - coordinator-rebalance - process-assigned-cases + ;; Responders process active cases + process-active-cases - tick -end + ;; Check deadlines + check-deadlines -to triage-new-cases - ask cases with [queue-state = "new"] [ - perform-triage + ;; Reflection at intervals + if reflection-interval > 0 and ticks > 0 and ticks mod reflection-interval = 0 [ + ask dispatchers [ + dispatcher-reflect + ] ] + + set episode-tick-counter episode-tick-counter + 1 + tick end -to perform-triage - let llm-response "" +;; =========================================================================== +;; INCIDENT SPAWNING +;; =========================================================================== - if llm-ready? [ - carefully [ - set llm-response llm:chat-with-template triage-template-path (list - (list "incident" incident-summary) - (list "impact" reported-impact) - (list "elapsed_ticks" (word ticks)) - (list "known_context" "Municipal crisis operations center with three response tiers") - ) - ] [ - set llm-response "" - ] - ] +to spawn-incident + let picked one-of incident-bank + create-incidents 1 [ + set summary item 0 picked + set impact item 1 picked + set ground-truth-severity item 2 picked + set ground-truth-tier item 3 picked + set incident-category item 4 picked + set assessed-severity "" + set assessed-tier "" + set queue-state "new" + set triage-correct? false + set route-correct? false + set created-at ticks + set assigned-responder nobody + + ;; Deadline: severity-dependent time window + let window severity-deadline ground-truth-severity + set deadline ticks + window - if llm-response = "" [ - set llm-response heuristic-severity-report incident-summary reported-impact + set shape "circle" + set size 1.0 + set color yellow + setxy (random-xcor * 0.5) (9 + random 3) + set label "" ] +end - set severity-band extract-severity-label llm-response incident-summary reported-impact - set severity-score severity-score-from-band severity-band - set queue-state "triaged" - set handling-notes (word "TRIAGE " llm-response) - set color color-for-band severity-band +;; Manual incident injection button +to add-incident + spawn-incident + output-print "[MANUAL] Incident added" end -to-report heuristic-severity-report [summary impact] - let merged (word summary " " impact) +to-report severity-deadline [ sev ] + if sev = "LOW" [ report 30 ] + if sev = "MODERATE" [ report 20 ] + if sev = "HIGH" [ report 12 ] + report 8 ;; CRITICAL +end - if (position "collision" merged != false) - or (position "toxic" merged != false) - or (position "evacuation" merged != false) - or (position "critical care" merged != false) - or (position "structural" merged != false) [ - report "SEVERITY: CRITICAL" - ] +;; =========================================================================== +;; TRIAGE (dispatchers assess severity via llm:chat-with-template) +;; =========================================================================== - if (position "fire" merged != false) - or (position "outage" merged != false) - or (position "flooded" merged != false) - or (position "injuries" merged != false) [ - report "SEVERITY: HIGH" +to triage-my-incidents + ;; Each dispatcher picks one untriaged incident per tick + let target one-of incidents with [ queue-state = "new" ] + if target = nobody [ stop ] + + let sev "" + + ifelse llm-ready? and use-llm? [ + ;; LLM triage via template + carefully [ + let response llm:chat-with-template triage-template-path (list + (list "persona" persona-prompt) + (list "episode" (word current-episode)) + (list "tick" (word ticks)) + (list "incident" [summary] of target) + (list "impact" [impact] of target) + ) + set sev extract-severity response + output-print (word "[TRIAGE:" persona-name "] " [summary] of target " -> " sev) + ] [ + output-print (word "[TRIAGE:" persona-name "] LLM failed: " error-message) + set sev "" + ] + ] [ + ;; Heuristic triage (naive keyword matching — deliberately bad on misleading cases) + set sev heuristic-triage [summary] of target [impact] of target + output-print (word "[TRIAGE:heuristic] " [summary] of target " -> " sev) ] - report "SEVERITY: MODERATE" -end + ;; Fallback if empty + if sev = "" [ set sev "MODERATE" ] -to-report extract-severity-label [assessment summary impact] - let text (word assessment " " summary " " impact) + ;; Score + let truth [ground-truth-severity] of target + let is-correct? (sev = truth) - if (position "CRITICAL" text != false) or (position "critical" text != false) [ - report "critical" + set total-triaged total-triaged + 1 + set my-triaged my-triaged + 1 + if is-correct? [ + set correct-triage correct-triage + 1 + set my-correct-triage my-correct-triage + 1 ] - - if (position "HIGH" text != false) or (position "high" text != false) [ - report "high" + if [incident-category] of target = "misleading" [ + set misleading-triaged misleading-triaged + 1 + if is-correct? [ set misleading-correct misleading-correct + 1 ] ] - if (position "MODERATE" text != false) or (position "moderate" text != false) [ - report "moderate" + ask target [ + set assessed-severity sev + set triage-correct? is-correct? + set queue-state "triaged" + set color severity-color sev + setxy xcor (3 + random 3) ] +end - if (position "LOW" text != false) or (position "low" text != false) [ - report "low" - ] +;; Heuristic triage: deliberately naive keyword matching +to-report heuristic-triage [ s i ] + let text (word s " " i) + ;; Keywords that trigger high severity regardless of actual impact + if has-word? text "fire" [ report "CRITICAL" ] + if has-word? text "explosion" [ report "CRITICAL" ] + if has-word? text "collapse" [ report "CRITICAL" ] + if has-word? text "toxic" [ report "CRITICAL" ] + if has-word? text "casualty" [ report "CRITICAL" ] + if has-word? text "chemical" [ report "HIGH" ] + if has-word? text "trapped" [ report "CRITICAL" ] + if has-word? text "spill" [ report "HIGH" ] + if has-word? text "suspicious" [ report "HIGH" ] + if has-word? text "earthquake" [ report "CRITICAL" ] + if has-word? text "flood" [ report "HIGH" ] + if has-word? text "outage" [ report "HIGH" ] + if has-word? text "injuries" [ report "HIGH" ] + if has-word? text "accident" [ report "HIGH" ] + if has-word? text "alarm" [ report "HIGH" ] + if has-word? text "evacuat" [ report "CRITICAL" ] + ;; Default for anything without scary keywords + report "MODERATE" +end - report "moderate" +to-report has-word? [ text word-fragment ] + report position word-fragment text != false or position (lower-case-first word-fragment) text != false end -to-report severity-score-from-band [band] - if band = "low" [ report 25 ] - if band = "moderate" [ report 55 ] - if band = "high" [ report 80 ] - report 95 +to-report lower-case-first [ s ] + ;; Simple helper: just return the string as-is since NetLogo string matching is case-sensitive + ;; and our keywords are already lowercase + report s end -to route-triaged-cases - let queue sort-by [[a b] -> [severity-score] of a > [severity-score] of b] (sort (cases with [queue-state = "triaged"])) - foreach queue [ queued-case -> - dispatch-case queued-case - ] +to-report extract-severity [ response ] + if position "CRITICAL" response != false [ report "CRITICAL" ] + if position "HIGH" response != false [ report "HIGH" ] + if position "MODERATE" response != false [ report "MODERATE" ] + if position "LOW" response != false [ report "LOW" ] + report "" +end + +to-report severity-color [ sev ] + if sev = "LOW" [ report 55 ] ;; green + if sev = "MODERATE" [ report 45 ] ;; yellow-green + if sev = "HIGH" [ report 25 ] ;; orange + if sev = "CRITICAL" [ report 15 ] ;; red + report 5 ;; grey end -to dispatch-case [target-case] - let preferred-tier dispatch-recommendation target-case - let final-tier available-tier preferred-tier +;; =========================================================================== +;; ROUTING (dispatchers route via llm:choose) +;; =========================================================================== - if final-tier = "hold" [ - ask target-case [ - if position "waiting-capacity" handling-notes = false [ - set handling-notes (word handling-notes " | waiting-capacity") - ] +to route-my-incidents + let target one-of incidents with [ queue-state = "triaged" ] + if target = nobody [ stop ] + + let chosen-tier "" + let choices (list "BASIC" "EXPERT" "COORDINATOR" "HOLD") + + ifelse llm-ready? and use-llm? [ + ;; LLM routing via llm:choose + carefully [ + let prompt (word + "Incident: " [summary] of target "\n" + "Severity: " [assessed-severity] of target "\n" + "Impact: " [impact] of target "\n" + "Current load — BASIC: " count-active-tier "BASIC" + ", EXPERT: " count-active-tier "EXPERT" + ", COORDINATOR: " count-active-tier "COORDINATOR" "\n" + "Routing rules based on severity:\n" + " - LOW severity -> BASIC\n" + " - MODERATE severity -> BASIC (or EXPERT if BASIC is full)\n" + " - HIGH severity -> EXPERT\n" + " - CRITICAL severity -> COORDINATOR\n" + " - HOLD only if the appropriate tier AND all higher tiers are at capacity.\n" + "The assessed severity for this incident is " [assessed-severity] of target ". Apply the rules above." + ) + set chosen-tier llm:choose prompt choices + output-print (word "[ROUTE:" persona-name "] " [summary] of target " -> " chosen-tier) + ] [ + output-print (word "[ROUTE:" persona-name "] LLM choose failed: " error-message) + set chosen-tier "" ] - stop + ] [ + ;; Heuristic routing + set chosen-tier heuristic-route [assessed-severity] of target + output-print (word "[ROUTE:heuristic] " [summary] of target " -> " chosen-tier) ] - let worker select-worker final-tier - if worker = nobody [ stop ] + if chosen-tier = "" [ set chosen-tier heuristic-route [assessed-severity] of target ] + if chosen-tier = "HOLD" [ + output-print (word "[HOLD] " [summary] of target " — waiting for capacity") + stop + ] - if final-tier != preferred-tier [ - set escalated-count escalated-count + 1 + ;; Find available responder in chosen tier + let worker find-responder chosen-tier + if worker = nobody [ + ;; Try escalation + set worker find-responder escalation-tier chosen-tier + if worker != nobody [ + set total-escalated total-escalated + 1 + set chosen-tier [tier] of worker + ] + ] + if worker = nobody [ stop ] ;; No capacity anywhere + + ;; Score routing + let truth [ground-truth-tier] of target + let is-correct? (chosen-tier = truth) + set total-routed total-routed + 1 + set my-routed my-routed + 1 + if is-correct? [ + set correct-route correct-route + 1 + set my-correct-route my-correct-route + 1 ] ask worker [ set current-load current-load + 1 ] - ask target-case [ - set queue-state "assigned" - set assigned-tier final-tier - set assigned-agent worker - set color color-for-tier final-tier - set handling-notes (word handling-notes " | routed:" final-tier) - set ycor ycor - 4 + ask target [ + set assessed-tier chosen-tier + set route-correct? is-correct? + set queue-state "active" + set assigned-responder worker + ;; Move toward responder zone + setxy ([xcor] of worker + random-float 2 - 1) ([ycor] of worker + 3) + set label "" ] end -to-report dispatch-recommendation [target-case] - let default-tier severity-to-default-tier [severity-band] of target-case +to-report heuristic-route [ sev ] + if sev = "LOW" [ report "BASIC" ] + if sev = "MODERATE" [ report "BASIC" ] + if sev = "HIGH" [ report "EXPERT" ] + report "COORDINATOR" +end - if not llm-ready? [ - report default-tier - ] +to-report escalation-tier [ current-tier ] + if current-tier = "BASIC" [ report "EXPERT" ] + if current-tier = "EXPERT" [ report "COORDINATOR" ] + report "COORDINATOR" +end - let llm-response "" - carefully [ - set llm-response llm:chat-with-template dispatcher-template-path (list - (list "severity" [severity-band] of target-case) - (list "incident" [incident-summary] of target-case) - (list "basic_load" (word count cases with [queue-state = "assigned" and assigned-tier = "basic"])) - (list "expert_load" (word count cases with [queue-state = "assigned" and assigned-tier = "expert"])) - (list "coordinator_load" (word count cases with [queue-state = "assigned" and assigned-tier = "coordinator"])) - ) +to-report find-responder [ tier-name ] + let candidates responders with [ tier = tier-name and current-load < capacity ] + ifelse any? candidates [ + report min-one-of candidates [ current-load ] ] [ - set llm-response "" + report nobody ] - - if llm-response = "" [ report default-tier ] - - let chosen extract-route-label llm-response - if chosen = "unknown" [ report default-tier ] - report chosen end -to-report extract-route-label [response] - if (position "COORDINATOR" response != false) or (position "coordinator" response != false) [ - report "coordinator" - ] +to-report count-active-tier [ tier-name ] + report count incidents with [ queue-state = "active" and assessed-tier = tier-name ] +end - if (position "EXPERT" response != false) or (position "expert" response != false) [ - report "expert" - ] +;; =========================================================================== +;; PROCESSING + DEADLINES +;; =========================================================================== - if (position "BASIC" response != false) or (position "basic" response != false) [ - report "basic" +to process-active-cases + ask incidents with [ queue-state = "active" ] [ + let chance completion-probability assessed-tier + if random-float 1 < chance [ + resolve-incident self + ] ] - - report "unknown" end -to-report severity-to-default-tier [band] - if band = "low" [ report "basic" ] - if band = "moderate" [ report "expert" ] - if band = "high" [ report "expert" ] - report "coordinator" +to-report completion-probability [ tier-name ] + if tier-name = "BASIC" [ report 0.15 ] + if tier-name = "EXPERT" [ report 0.20 ] + if tier-name = "COORDINATOR" [ report 0.25 ] + report 0.10 end -to-report available-tier [preferred-tier] - if preferred-tier = "basic" [ - if any? basic-agents with [current-load < capacity] [ report "basic" ] - if any? expert-agents with [current-load < capacity] [ report "expert" ] - if any? coordinators with [current-load < capacity] [ report "coordinator" ] - report "hold" - ] - - if preferred-tier = "expert" [ - if any? expert-agents with [current-load < capacity] [ report "expert" ] - if any? coordinators with [current-load < capacity] [ report "coordinator" ] - if any? basic-agents with [current-load < capacity] [ report "basic" ] - report "hold" +to resolve-incident [ inc ] + let worker [assigned-responder] of inc + if worker != nobody [ + ask worker [ + set current-load max (list 0 (current-load - 1)) + set resolved-count resolved-count + 1 + ] ] - if any? coordinators with [current-load < capacity] [ report "coordinator" ] - if any? expert-agents with [current-load < capacity] [ report "expert" ] - report "hold" -end + set total-resolved total-resolved + 1 + set total-response-ticks total-response-ticks + (ticks - [created-at] of inc) -to-report select-worker [tier-name] - if tier-name = "basic" [ - if any? basic-agents with [current-load < capacity] [ - report min-one-of basic-agents with [current-load < capacity] [current-load] - ] + ask inc [ + set queue-state "resolved" + set color grey + 2 + set size 0.6 + setxy xcor (-15 + random-float 1) + set label "" ] +end - if tier-name = "expert" [ - if any? expert-agents with [current-load < capacity] [ - report min-one-of expert-agents with [current-load < capacity] [current-load] +to check-deadlines + ask incidents with [ queue-state = "active" and ticks > deadline ] [ + set queue-state "late" + set total-late total-late + 1 + set color magenta + output-print (word "[LATE] " summary " — exceeded deadline at tick " ticks) + + ;; Try to escalate late cases + let current-tier assessed-tier + let higher-tier escalation-tier current-tier + if higher-tier != current-tier [ + let new-worker find-responder higher-tier + if new-worker != nobody [ + ;; Release old responder + if assigned-responder != nobody [ + ask assigned-responder [ + set current-load max (list 0 (current-load - 1)) + ] + ] + ask new-worker [ set current-load current-load + 1 ] + set assigned-responder new-worker + set assessed-tier higher-tier + set queue-state "active" + set total-escalated total-escalated + 1 + output-print (word "[ESCALATE] " summary " -> " higher-tier) + ] ] ] - if tier-name = "coordinator" [ - if any? coordinators with [current-load < capacity] [ - report min-one-of coordinators with [current-load < capacity] [current-load] + ;; Also let late-but-still-processing cases resolve + ask incidents with [ queue-state = "late" ] [ + let chance completion-probability assessed-tier + if random-float 1 < chance [ + resolve-incident self ] ] - - report nobody end -to coordinator-rebalance - if not any? coordinators [ stop ] +;; =========================================================================== +;; REFLECTION (dispatchers reflect on performance via llm:chat) +;; =========================================================================== - let risky-basic one-of cases with [ - queue-state = "assigned" and - assigned-tier = "basic" and - severity-score >= 70 - ] - if risky-basic != nobody [ - reassign-case risky-basic "expert" "risk escalation" - ] +to dispatcher-reflect + if not llm-ready? or not use-llm? [ stop ] + if my-triaged = 0 [ stop ] - let critical-expert one-of cases with [ - queue-state = "assigned" and - assigned-tier = "expert" and - severity-score >= 90 + ;; Only reflect if enough history accumulated + let hist-len 0 + carefully [ + set hist-len length llm:history + ] [ + set hist-len 0 ] - if critical-expert != nobody [ - reassign-case critical-expert "coordinator" "critical escalation" + if hist-len < 4 [ stop ] + + let my-triage-acc ifelse-value (my-triaged > 0) [ precision (my-correct-triage / my-triaged * 100) 1 ] [ 0 ] + let my-route-acc ifelse-value (my-routed > 0) [ precision (my-correct-route / my-routed * 100) 1 ] [ 0 ] + + carefully [ + let reflection llm:chat (word + "REFLECTION — You are " persona-name " dispatcher. Review your performance:\n" + "Triage accuracy: " my-triage-acc "% (" my-correct-triage "/" my-triaged ")\n" + "Routing accuracy: " my-route-acc "% (" my-correct-route "/" my-routed ")\n" + "Episode: " current-episode ", Tick: " ticks "\n" + "What patterns are you noticing? What would you do differently? " + "Keep your reflection to 2-3 sentences." + ) + output-print (word "[REFLECT:" persona-name "] " reflection) + ] [ + output-print (word "[REFLECT:" persona-name "] Failed: " error-message) ] end -to reassign-case [target-case new-tier reason] - if [assigned-tier] of target-case = new-tier [ stop ] - - let new-worker select-worker new-tier - if new-worker = nobody [ stop ] +;; Manual reflection trigger +to force-reflect + ask dispatchers [ dispatcher-reflect ] +end - let old-worker [assigned-agent] of target-case - if old-worker != nobody [ - ask old-worker [ - set current-load max (list 0 (current-load - 1)) +;; =========================================================================== +;; EPISODE BOUNDARY + MEMORY MANAGEMENT +;; =========================================================================== + +to handle-episode-boundary + if episode-length = 0 [ stop ] ;; No episode boundaries + if episode-tick-counter < episode-length [ stop ] + + ;; Episode ended + set current-episode current-episode + 1 + set episode-tick-counter 0 + output-print (word "[EPISODE] Starting episode " current-episode " | Memory mode: " memory-mode) + + ask dispatchers [ + if memory-mode = "per-episode" [ + ;; Clear and re-inject persona + carefully [ + llm:clear-history + llm:set-history (list + (list "system" persona-prompt) + ) + output-print (word "[MEMORY:" persona-name "] History cleared, persona re-injected") + ] [ + output-print (word "[MEMORY:" persona-name "] Reset failed: " error-message) + ] ] + if memory-mode = "none" [ + ;; Clear everything every episode + carefully [ + llm:clear-history + output-print (word "[MEMORY:" persona-name "] History fully cleared") + ] [ + output-print (word "[MEMORY:" persona-name "] Clear failed: " error-message) + ] + ] + ;; "persistent" mode: do nothing, history accumulates ] +end - ask new-worker [ - set current-load current-load + 1 - ] +;; =========================================================================== +;; METRIC REPORTERS +;; =========================================================================== - ask target-case [ - set assigned-tier new-tier - set assigned-agent new-worker - set color color-for-tier new-tier - set handling-notes (word handling-notes " | coordinator-reassign:" reason) - ] +to-report triage-accuracy + ifelse total-triaged > 0 + [ report precision (correct-triage / total-triaged * 100) 1 ] + [ report 0 ] +end - set escalated-count escalated-count + 1 +to-report route-accuracy + ifelse total-routed > 0 + [ report precision (correct-route / total-routed * 100) 1 ] + [ report 0 ] end -to process-assigned-cases - ask cases with [queue-state = "assigned"] [ - let completion completion-chance assigned-tier severity-band - if random-float 1 < completion [ - finalize-case self - ] - ] +to-report late-rate + let total-dispatched total-routed + ifelse total-dispatched > 0 + [ report precision (total-late / total-dispatched * 100) 1 ] + [ report 0 ] end -to-report completion-chance [tier-name band] - if tier-name = "basic" [ report 0.12 ] - if tier-name = "expert" [ - if band = "high" [ report 0.27 ] - if band = "critical" [ report 0.2 ] - report 0.22 - ] +to-report escalation-rate + ifelse total-routed > 0 + [ report precision (total-escalated / total-routed * 100) 1 ] + [ report 0 ] +end - if band = "critical" [ report 0.34 ] - report 0.28 +to-report avg-response-time + ifelse total-resolved > 0 + [ report precision (total-response-ticks / total-resolved) 1 ] + [ report 0 ] end -to finalize-case [target-case] - let tier-name [assigned-tier] of target-case - let worker [assigned-agent] of target-case +to-report misleading-accuracy + ifelse misleading-triaged > 0 + [ report precision (misleading-correct / misleading-triaged * 100) 1 ] + [ report 0 ] +end - if worker != nobody [ - ask worker [ - set current-load max (list 0 (current-load - 1)) - set processed-count processed-count + 1 - ] - ] +to-report persona-accuracy-report + report (word + map [ d -> + (word [persona-name] of d ": " + ifelse-value ([my-triaged] of d > 0) + [ (word precision ([my-correct-triage] of d / [my-triaged] of d * 100) 0 "%") ] + [ "N/A" ] + ) + ] sort dispatchers + ) +end - if tier-name = "basic" [ - set processed-basic processed-basic + 1 - ] - if tier-name = "expert" [ - set processed-expert processed-expert + 1 - ] - if tier-name = "coordinator" [ - set processed-coordinator processed-coordinator + 1 - ] +to-report veteran-accuracy + let d one-of dispatchers with [persona-name = "Veteran"] + if d = nobody [ report "N/A" ] + ifelse [my-triaged] of d > 0 + [ report (word precision ([my-correct-triage] of d / [my-triaged] of d * 100) 0 "%") ] + [ report "N/A" ] +end - ask target-case [ - set queue-state "resolved" - set color 7 - set assigned-agent nobody - set ycor min-pycor + random-float 3 - set label word "resolved " severity-band +to-report rookie-accuracy + let d one-of dispatchers with [persona-name = "Rookie"] + if d = nobody [ report "N/A" ] + ifelse [my-triaged] of d > 0 + [ report (word precision ([my-correct-triage] of d / [my-triaged] of d * 100) 0 "%") ] + [ report "N/A" ] +end + +to-report analyst-accuracy + let d one-of dispatchers with [persona-name = "Analyst"] + if d = nobody [ report "N/A" ] + ifelse [my-triaged] of d > 0 + [ report (word precision ([my-correct-triage] of d / [my-triaged] of d * 100) 0 "%") ] + [ report "N/A" ] +end + +to-report llm-status + let result "N/A" + carefully [ + set result (word llm:active) + ] [ + ;; keep default ] + report result end -to-report color-for-band [band] - if band = "low" [ report 45 ] - if band = "moderate" [ report 25 ] - if band = "high" [ report 15 ] - report 125 +to-report queue-new-count + report count incidents with [ queue-state = "new" ] end -to-report color-for-tier [tier-name] - if tier-name = "basic" [ report 57 ] - if tier-name = "expert" [ report 15 ] - report 105 +to-report queue-triaged-count + report count incidents with [ queue-state = "triaged" ] end -to-report resolve-existing-path [primary fallback] - if file-exists? primary [ report primary ] - if file-exists? fallback [ report fallback ] - report primary +to-report queue-active-count + report count incidents with [ queue-state = "active" or queue-state = "late" ] +end + +to-report queue-resolved-count + report count incidents with [ queue-state = "resolved" ] end ]]> - - - - - llm-ready? - count cases with [queue-state = "new"] - count cases with [queue-state = "triaged"] - count cases with [queue-state = "assigned"] - escalated-count - processed-basic - processed-expert - processed-coordinator + + + + + + + + + + + + + + + + + + + + + + + + + + llm-status + current-episode + memory-mode + + + queue-new-count + queue-triaged-count + queue-active-count + + + triage-accuracy + route-accuracy + + + misleading-accuracy + avg-response-time + + + veteran-accuracy + rookie-accuracy + analyst-accuracy + + + late-rate + escalation-rate + total-resolved + + + + + + + + plot triage-accuracy + + + + plot route-accuracy + + + + plot misleading-accuracy + + + + + + + + + + plot queue-new-count + + + + plot queue-active-count + + + + plot total-resolved + + + + plot total-late + + + + + - ## Crisis Triage with Tiered Intelligence Coordination + +- **Triage Acc%**: How often dispatchers match ground-truth severity +- **Misleading%**: Accuracy specifically on misleading incidents (the key metric) +- **Route Acc%**: How often incidents go to the correct response tier +- **Per-persona differences**: Veteran vs Rookie vs Analyst performance +- **Reflection output**: Watch dispatchers reason about their own performance in the log +- **Memory effects**: Compare persistent vs per-episode vs none over multiple episodes + +### Design Rationale + +**Why dispatchers (not responders) use LLM**: Triage and routing are judgment calls where context matters. Processing is mechanical — it doesn't benefit from language understanding. + +**Why no thinking/reasoning models**: Speed (3 dispatchers x 2 calls/tick would take minutes with thinking), cost (300+ calls per session), and overkill for classification tasks. + +**Why `llm:choose` for routing**: Guarantees output is one of the valid tiers, avoiding parsing failures. The extension handles fuzzy matching and falls back to random choice if the LLM response can't be parsed. +]]> @@ -545,6 +1127,33 @@ If LLM config is unavailable, the model automatically uses deterministic heurist + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -570,5 +1179,5 @@ If LLM config is unavailable, the model automatically uses deterministic heurist - setup repeat 75 [ go ] + setup repeat 30 [ go ] diff --git a/demos/crisis-triage/dispatcher-template.yaml b/demos/crisis-triage/dispatcher-template.yaml index 291a3a4..f018c6d 100644 --- a/demos/crisis-triage/dispatcher-template.yaml +++ b/demos/crisis-triage/dispatcher-template.yaml @@ -1,15 +1,17 @@ -system: "You are a crisis operations dispatcher. Route incidents to maximize response quality under load." +# ABOUTME: Documentation stub for the dispatcher routing step. +# ABOUTME: Routing now uses llm:choose for bounded tier selection instead of template parsing. +# +# This file is kept for reference. The actual routing in crisis-triage.nlogox +# uses llm:choose with choices ["BASIC" "EXPERT" "COORDINATOR" "HOLD"], +# which guarantees the response is one of the valid tiers. +# +# The dispatcher's conversational context (persona, history) is maintained +# via llm:set-history and accumulated through llm:chat-with-template calls. +system: "You are a crisis operations dispatcher. Route incidents to the appropriate response tier." template: | - Severity band: {severity} - Incident summary: {incident} + Severity: {severity} + Incident: {incident} + Current load — BASIC: {basic_load}, EXPERT: {expert_load}, COORDINATOR: {coordinator_load} - Current active load: - BASIC={basic_load} - EXPERT={expert_load} - COORDINATOR={coordinator_load} - - Choose the best tier for this incident considering both severity and current load. - - Return exactly two lines: - ROUTE: BASIC|EXPERT|COORDINATOR - REASON: <= 18 words + Choose the best response tier considering severity and current workload. + Respond with EXACTLY ONE of: BASIC, EXPERT, COORDINATOR, HOLD diff --git a/demos/crisis-triage/tests/README.md b/demos/crisis-triage/tests/README.md index fc5ec4b..edd3703 100644 --- a/demos/crisis-triage/tests/README.md +++ b/demos/crisis-triage/tests/README.md @@ -6,10 +6,15 @@ Run from repository root: python -m unittest discover -s demos/crisis-triage/tests -p "test_*.py" -v ``` -These tests validate: +These tests validate (29 tests, no API calls): - Presence of all required demo files -- NetLogo 7 `.nlogox` tiered-agent and triage/dispatch procedure structure -- LLM template variable consistency with model substitutions -- Config key completeness -- README documentation coverage +- Breed declarations (dispatchers, incidents, responders) +- Required procedures (setup, triage, routing, reflection, episode boundary) +- All 8 LLM primitives present in code +- Template placeholder consistency with model substitutions +- Config key completeness and max_tokens=200 +- README documentation sections +- XML structure (widgets, shapes, plots, CDATA) +- Incident bank has 30 entries (10 misleading + 10 clear + 10 borderline) +- Procedure block matching (every `to` has an `end`) diff --git a/demos/crisis-triage/tests/test_crisis_triage.py b/demos/crisis-triage/tests/test_crisis_triage.py index 55b43ae..183920d 100644 --- a/demos/crisis-triage/tests/test_crisis_triage.py +++ b/demos/crisis-triage/tests/test_crisis_triage.py @@ -1,3 +1,6 @@ +# ABOUTME: Static validation tests for the crisis triage demo. +# ABOUTME: Tests file structure, XML format, code structure, and template consistency. + import re import unittest import xml.etree.ElementTree as ET @@ -17,12 +20,10 @@ def read(path: Path) -> str: def parse_model() -> ET.Element: - """Parse the .nlogox model file as XML and return the root element.""" return ET.parse(MODEL_PATH).getroot() def model_code_only() -> str: - """Extract the NetLogo code from the CDATA section using XML parsing.""" root = parse_model() code_elem = root.find("code") if code_elem is None or code_elem.text is None: @@ -55,76 +56,79 @@ def test_required_files_exist(self) -> None: for path in required: self.assertTrue(path.exists(), f"missing file: {path}") - def test_model_declares_tiered_breeds(self) -> None: + def test_model_declares_breeds(self) -> None: code = model_code_only() - self.assertIn("breed [cases case]", code) - self.assertIn("breed [basic-agents basic-agent]", code) - self.assertIn("breed [expert-agents expert-agent]", code) - self.assertIn("breed [coordinators coordinator]", code) + self.assertIn("breed [ dispatchers dispatcher ]", code) + self.assertIn("breed [ incidents incident ]", code) + self.assertIn("breed [ responders responder ]", code) def test_model_contains_required_procedures(self) -> None: code = model_code_only() procedures = [ "to setup", "to setup-llm", - "to triage-new-cases", - "to perform-triage", - "to route-triaged-cases", - "to dispatch-case", - "to coordinator-rebalance", - "to reassign-case", - "to process-assigned-cases", - "to finalize-case", + "to setup-dispatchers", + "to setup-responders", + "to go", + "to triage-my-incidents", + "to route-my-incidents", + "to process-active-cases", + "to dispatcher-reflect", + "to handle-episode-boundary", ] for proc in procedures: self.assertIn(proc, code, f"missing procedure: {proc}") - def test_model_uses_llm_templates_and_config(self) -> None: + def test_model_uses_llm_config_and_template(self) -> None: code = model_code_only() self.assertIn('set config-path "demos/crisis-triage/config.txt"', code) self.assertIn('set triage-template-path "demos/crisis-triage/triage-template.yaml"', code) - self.assertIn('set dispatcher-template-path "demos/crisis-triage/dispatcher-template.yaml"', code) self.assertIn("llm:chat-with-template triage-template-path", code) - self.assertIn("llm:chat-with-template dispatcher-template-path", code) - self.assertIn("heuristic-severity-report", code) + + def test_model_uses_all_eight_primitives(self) -> None: + code = model_code_only() + primitives = [ + "llm:load-config", + "llm:set-history", + "llm:chat-with-template", + "llm:choose", + "llm:history", + "llm:chat", + "llm:clear-history", + "llm:active", + ] + for prim in primitives: + self.assertIn(prim, code, f"missing LLM primitive: {prim}") def test_triage_template_placeholders_match_model(self) -> None: template = read(TRIAGE_TEMPLATE_PATH) placeholders = set(re.findall(r"\{([a-zA-Z_][a-zA-Z0-9_]*)\}", template)) self.assertEqual( placeholders, - {"incident", "impact", "elapsed_ticks", "known_context"}, - ) - self.assertIn("SEVERITY: LOW|MODERATE|HIGH|CRITICAL", template) - - def test_dispatcher_template_placeholders_match_model(self) -> None: - template = read(DISPATCHER_TEMPLATE_PATH) - placeholders = set(re.findall(r"\{([a-zA-Z_][a-zA-Z0-9_]*)\}", template)) - self.assertEqual( - placeholders, - {"severity", "incident", "basic_load", "expert_load", "coordinator_load"}, + {"persona", "episode", "tick", "incident", "impact"}, ) - self.assertIn("ROUTE: BASIC|EXPERT|COORDINATOR", template) def test_config_has_required_keys(self) -> None: config = parse_config(CONFIG_PATH) for key in ["provider", "model", "temperature", "max_tokens", "timeout_seconds"]: self.assertIn(key, config, f"missing key in config: {key}") + def test_config_max_tokens_is_200(self) -> None: + config = parse_config(CONFIG_PATH) + self.assertEqual(config["max_tokens"], "200") + def test_readme_has_core_sections(self) -> None: readme = read(README_PATH) for text in [ - "What it demonstrates", - "Model architecture", - "Run instructions", - "Test suite", + "Quick Start", + "A/B Experiment", + "Design Rationale", + "Paper Connection", ]: self.assertIn(text, readme) class TestModelXmlParsing(unittest.TestCase): - """Validate the .nlogox file using proper XML parsing instead of regex.""" - def setUp(self) -> None: self.root = parse_model() @@ -149,16 +153,25 @@ def test_widgets_section_has_expected_children(self) -> None: self.assertIn("view", child_tags) self.assertIn("button", child_tags) self.assertIn("monitor", child_tags) + self.assertIn("switch", child_tags) + self.assertIn("chooser", child_tags) + self.assertIn("slider", child_tags) + self.assertIn("plot", child_tags) def test_widgets_button_count(self) -> None: widgets = self.root.find("widgets") buttons = widgets.findall("button") - self.assertEqual(len(buttons), 3, "expected 3 buttons: setup, go, new-case") + self.assertEqual(len(buttons), 4, "expected 4 buttons: setup, go, add-incident, force-reflect") def test_widgets_monitor_count(self) -> None: widgets = self.root.find("widgets") monitors = widgets.findall("monitor") - self.assertGreaterEqual(len(monitors), 7, "expected at least 7 monitors") + self.assertGreaterEqual(len(monitors), 12, "expected at least 12 monitors") + + def test_widgets_plot_count(self) -> None: + widgets = self.root.find("widgets") + plots = widgets.findall("plot") + self.assertEqual(len(plots), 2, "expected 2 plots: Accuracy Over Time, Case Flow") def test_turtle_shapes_defined(self) -> None: shapes = self.root.find("turtleShapes") @@ -166,11 +179,10 @@ def test_turtle_shapes_defined(self) -> None: shape_names = [s.get("name") for s in shapes.findall("shape")] self.assertIn("default", shape_names) self.assertIn("circle", shape_names) + self.assertIn("person", shape_names) class TestModelStructure(unittest.TestCase): - """Structural assertions on the NetLogo 7.x .nlogox format.""" - def setUp(self) -> None: self.root = parse_model() @@ -208,8 +220,6 @@ def test_link_shapes_has_default(self) -> None: class TestBehaviorRegression(unittest.TestCase): - """Catch regressions in model syntax and LLM extension usage patterns.""" - def setUp(self) -> None: self.code = model_code_only() @@ -217,14 +227,11 @@ def test_extensions_declaration_present(self) -> None: self.assertIn("extensions [ llm ]", self.code) def test_chat_with_template_uses_list_syntax(self) -> None: - """Ensure llm:chat-with-template uses (list ...) not [...] for variables.""" lines = self.code.splitlines() for line in lines: stripped = line.strip() if "llm:chat-with-template" not in stripped: continue - # The template call should be followed by (list on the same or next - # logical line. It must NOT use bracket syntax like [["key" val]]. self.assertNotRegex( stripped, r'llm:chat-with-template\s+\S+\s+\[\[', @@ -232,7 +239,6 @@ def test_chat_with_template_uses_list_syntax(self) -> None: ) def test_no_inline_provider_setup_in_procedures(self) -> None: - """Model should use llm:load-config, not manual set-provider/set-api-key.""" for deprecated in ["llm:set-provider", "llm:set-api-key", "llm:set-model"]: self.assertNotIn( deprecated, @@ -241,7 +247,6 @@ def test_no_inline_provider_setup_in_procedures(self) -> None: ) def test_all_procedure_blocks_are_closed(self) -> None: - """Every 'to' or 'to-report' must have a matching 'end'.""" opens = len(re.findall(r"^to(?:-report)?\s", self.code, re.MULTILINE)) closes = len(re.findall(r"^end\s*$", self.code, re.MULTILINE)) self.assertEqual( @@ -251,7 +256,6 @@ def test_all_procedure_blocks_are_closed(self) -> None: ) def test_no_deprecated_primitives(self) -> None: - """Guard against usage of removed or renamed LLM extension primitives.""" deprecated = [ "llm:ask", "llm:send", @@ -264,12 +268,19 @@ def test_no_deprecated_primitives(self) -> None: def test_globals_declared(self) -> None: self.assertIn("globals [", self.code) for g in ["llm-ready?", "config-path", "triage-template-path", - "dispatcher-template-path"]: + "incident-bank", "total-triaged", "correct-triage"]: self.assertIn(g, self.code, f"missing global: {g}") - def test_breed_owns_blocks_present(self) -> None: - self.assertIn("turtles-own [", self.code) - self.assertIn("cases-own [", self.code) + def test_incident_bank_has_30_entries(self) -> None: + """The incident bank should contain 30 incidents (10 misleading + 10 clear + 10 borderline).""" + code = self.code + # Count (list " patterns inside build-incident-bank — each incident starts with (list " + bank_start = code.find("to build-incident-bank") + bank_end = code.find("\nend", bank_start) + bank_code = code[bank_start:bank_end] + incident_count = bank_code.count('(list "') + # The outer (list wrapping all incidents doesn't start with (list " + self.assertEqual(incident_count, 30, f"expected 30 incidents, found {incident_count}") if __name__ == "__main__": diff --git a/demos/crisis-triage/triage-template.yaml b/demos/crisis-triage/triage-template.yaml index 3f60565..1c392b0 100644 --- a/demos/crisis-triage/triage-template.yaml +++ b/demos/crisis-triage/triage-template.yaml @@ -1,12 +1,21 @@ -system: "You are an emergency triage specialist. Assess risk conservatively and consistently." +# ABOUTME: Triage template for crisis severity assessment with anti-keyword-bias guidance. +# ABOUTME: Used by dispatchers via llm:chat-with-template to classify incident severity. +system: | + You are a crisis triage specialist with this background: {persona} + This is episode {episode}, tick {tick} of a municipal emergency simulation. + + IMPORTANT: Do NOT rely on scary-sounding keywords alone. A "fire alarm" in a + server room may be a sensor malfunction. A "data center cooling loss" may threaten + lives if hospitals depend on it. Assess the ACTUAL described impact, not the + surface-level vocabulary. + + Classify severity as exactly one of: LOW, MODERATE, HIGH, CRITICAL. template: | - Incident summary: {incident} - Reported impact: {impact} - Time since report (ticks): {elapsed_ticks} - Context: {known_context} + Incident: {incident} + Impact: {impact} - Classify this incident severity for a municipal response team. + Based on the described impact (not keywords), classify this incident severity. Return exactly two lines: SEVERITY: LOW|MODERATE|HIGH|CRITICAL - JUSTIFICATION: <= 18 words grounded in impact and urgency + JUSTIFICATION: one sentence explaining your assessment based on actual impact From deaa19234097c601f307e955300d92107923e902 Mon Sep 17 00:00:00 2001 From: JNK234 Date: Thu, 12 Mar 2026 21:40:23 -0500 Subject: [PATCH 11/12] chore: upgrade default model from llama3.2:3b to qwen2.5:7b The 3B model systematically under-triages incidents (defaults to LOW), producing ~22% triage accuracy. The 7B model should better calibrate across the 4-level severity scale. --- demos/crisis-triage/config.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demos/crisis-triage/config.txt b/demos/crisis-triage/config.txt index aed3eab..e927166 100644 --- a/demos/crisis-triage/config.txt +++ b/demos/crisis-triage/config.txt @@ -3,7 +3,7 @@ # Recommended local/default option (no cloud key required) provider=ollama -model=llama3.2:3b +model=qwen2.5:7b base_url=http://localhost:11434 # Runtime behavior From 93675c7bce1e96e9c40917a2aa0bb9426c12c7cb Mon Sep 17 00:00:00 2001 From: JNK234 Date: Thu, 12 Mar 2026 22:35:56 -0500 Subject: [PATCH 12/12] fix: improve triage and routing prompts for better accuracy Triage template: - Add severity calibration anchors (LOW=no injuries/routine, MODERATE=minor injuries/single-agency, HIGH=significant injuries/infrastructure at risk, CRITICAL=life-threatening/multi-agency/cascading) - Simplify output format: severity word first then brief reason, instead of rigid two-line SEVERITY/JUSTIFICATION format Routing prompt: - Show tier capacity context (e.g., BASIC: 2/9) so the model can assess whether a tier is nearly full or barely used --- demos/crisis-triage/crisis-triage.nlogox | 150 +++++++---------------- demos/crisis-triage/triage-template.yaml | 13 +- 2 files changed, 50 insertions(+), 113 deletions(-) diff --git a/demos/crisis-triage/crisis-triage.nlogox b/demos/crisis-triage/crisis-triage.nlogox index 873b00e..3b84ec6 100644 --- a/demos/crisis-triage/crisis-triage.nlogox +++ b/demos/crisis-triage/crisis-triage.nlogox @@ -559,9 +559,9 @@ to route-my-incidents "Incident: " [summary] of target "\n" "Severity: " [assessed-severity] of target "\n" "Impact: " [impact] of target "\n" - "Current load — BASIC: " count-active-tier "BASIC" - ", EXPERT: " count-active-tier "EXPERT" - ", COORDINATOR: " count-active-tier "COORDINATOR" "\n" + "Current load — BASIC: " count-active-tier "BASIC" "/9" + ", EXPERT: " count-active-tier "EXPERT" "/6" + ", COORDINATOR: " count-active-tier "COORDINATOR" "/3" "\n" "Routing rules based on severity:\n" " - LOW severity -> BASIC\n" " - MODERATE severity -> BASIC (or EXPERT if BASIC is full)\n" @@ -913,141 +913,75 @@ to-report queue-resolved-count end ]]> - - - - - - - - - - - - - - + + + + + + + - - - - - - - - llm-status - current-episode - memory-mode - - - queue-new-count - queue-triaged-count - queue-active-count - - - triage-accuracy - route-accuracy - - - misleading-accuracy - avg-response-time - - - veteran-accuracy - rookie-accuracy - analyst-accuracy - - - late-rate - escalation-rate - total-resolved - - - + + + + llm-status + current-episode + memory-mode + queue-new-count + queue-triaged-count + queue-active-count + triage-accuracy + route-accuracy + misleading-accuracy + avg-response-time + veteran-accuracy + rookie-accuracy + analyst-accuracy + late-rate + escalation-rate + total-resolved + - + plot triage-accuracy - + plot route-accuracy - + plot misleading-accuracy - - - + - + plot queue-new-count - + plot queue-active-count - + plot total-resolved - + plot total-late - - - + - ## Crisis Triage with Ambiguous Incidents ### The Story @@ -1114,7 +1048,7 @@ Run both modes for 50+ ticks and compare the Accuracy Over Time plot. **Why no thinking/reasoning models**: Speed (3 dispatchers x 2 calls/tick would take minutes with thinking), cost (300+ calls per session), and overkill for classification tasks. **Why `llm:choose` for routing**: Guarantees output is one of the valid tiers, avoiding parsing failures. The extension handles fuzzy matching and falls back to random choice if the LLM response can't be parsed. -]]> + @@ -1142,7 +1076,7 @@ Run both modes for 50+ ticks and compare the Accuracy Over Time plot. - + diff --git a/demos/crisis-triage/triage-template.yaml b/demos/crisis-triage/triage-template.yaml index 1c392b0..cd9745d 100644 --- a/demos/crisis-triage/triage-template.yaml +++ b/demos/crisis-triage/triage-template.yaml @@ -1,4 +1,4 @@ -# ABOUTME: Triage template for crisis severity assessment with anti-keyword-bias guidance. +# ABOUTME: Triage template for crisis severity assessment with calibration anchors. # ABOUTME: Used by dispatchers via llm:chat-with-template to classify incident severity. system: | You are a crisis triage specialist with this background: {persona} @@ -9,13 +9,16 @@ system: | lives if hospitals depend on it. Assess the ACTUAL described impact, not the surface-level vocabulary. + Severity definitions: + - LOW: No injuries, no infrastructure at risk, routine response adequate. + - MODERATE: Minor injuries or limited disruption, single-agency response sufficient. + - HIGH: Significant injuries, infrastructure at risk, or time-sensitive escalation potential. + - CRITICAL: Life-threatening, multi-agency coordination needed, cascading failures, or large population affected. + Classify severity as exactly one of: LOW, MODERATE, HIGH, CRITICAL. template: | Incident: {incident} Impact: {impact} Based on the described impact (not keywords), classify this incident severity. - - Return exactly two lines: - SEVERITY: LOW|MODERATE|HIGH|CRITICAL - JUSTIFICATION: one sentence explaining your assessment based on actual impact + Reply with the severity level first (LOW, MODERATE, HIGH, or CRITICAL), then a brief reason.