diff --git a/projects/templates/template_active_inference/figures.yaml b/projects/templates/template_active_inference/figures.yaml index 39d6a6c56..6b7316d2e 100644 --- a/projects/templates/template_active_inference/figures.yaml +++ b/projects/templates/template_active_inference/figures.yaml @@ -233,6 +233,7 @@ section_figures: results_invariants: - id: invariant_dashboard results_si_tmaze: + - id: efe_decomposition - id: si_belief_entropy_curve - id: si_obs_action_trace - id: si_tmaze_actions diff --git a/projects/templates/template_active_inference/manuscript/12_results_si_tmaze.md b/projects/templates/template_active_inference/manuscript/12_results_si_tmaze.md index f37476613..52eaa9bbc 100644 --- a/projects/templates/template_active_inference/manuscript/12_results_si_tmaze.md +++ b/projects/templates/template_active_inference/manuscript/12_results_si_tmaze.md @@ -8,12 +8,16 @@ Steps recorded: {{si_tmaze_steps}}. Mean belief entropy: {{si_tmaze_mean_belief_ Policy-comparison rows: {{si_policy_comparison_run_count}} across state-inference and policy-inference modes; goal-reaching rows: {{si_policy_comparison_goal_reached_count}}. Graph-world extension rows: {{si_graph_world_steps}} over {{si_graph_world_node_count}} nodes, with goal-reached flag {{si_graph_world_goal_reached}}. +The expected free energy that scores those policies decomposes in closed form ([@fig:efe_decomposition]). Across the {{efe_policy_count}} length-{{si_tmaze_policy_len}} policies on the T-maze generative model, the expected-free-energy-minimising policy is `{{efe_minimizing_policy}}` with $G$ = {{efe_minimizing_total_formatted}} nats, splitting into risk {{efe_risk_at_min_formatted}} (the pragmatic deviation of predicted outcomes from preferences) and ambiguity {{efe_ambiguity_at_min_formatted}} (the expected likelihood entropy) nats. The same $G$ splits equivalently into pragmatic value {{efe_pragmatic_at_min_formatted}} (expected log-preference) and epistemic value {{efe_epistemic_at_min_formatted}} (state-outcome mutual information) nats — the term that drives information-seeking. The two forms are exactly equal: risk + ambiguity + pragmatic + epistemic vanishes to within {{efe_max_identity_residual_formatted}} across every policy, the action-selection twin of the analytical free-energy decomposition identity ([@sec:results_free_energy]). + Rollout trace: `output/data/si_tmaze_trace.json`. JSONL run log: `output/logs/pymdp_runs.jsonl`. +![Closed-form Expected Free Energy decomposition over the finite T-maze policies. Left: $G(\pi)$ = risk + ambiguity (stacked), with the goal-seeking minimiser marked. Right: the pragmatic and epistemic values, which sum to $-G(\pi)$. Both forms are computed in closed form (no sampling) and satisfy risk + ambiguity + pragmatic + epistemic = 0 to machine precision.](../output/figures/efe_decomposition.png){#fig:efe_decomposition width=95% fig-alt="Two-panel bar chart of the Expected Free Energy term decomposition across the four length-two T-maze policies (action sequences 00, 01, 10, 11). The left panel stacks risk (pragmatic deviation, the KL of policy-predicted outcomes from preferences) below ambiguity (epistemic, the expected likelihood entropy) so each bar's height is the Expected Free Energy G(pi); the goal-seeking policy with minimum G is marked. The right panel shows the equal-and-opposite pragmatic value (expected log-preference) and epistemic value (state-outcome mutual information) per policy, with a zero reference line, illustrating the exact identity G(pi) = -(pragmatic + epistemic)."} + ![Belief entropy over time for the T-maze rollout (mean {{si_tmaze_mean_belief_entropy_formatted}} nats).](../output/figures/si_belief_entropy_curve.png){#fig:si_belief_entropy_curve width=90% fig-alt="Line plot of belief entropy in nats versus timestep for the pymdp T-maze rollout. Entropy ranges from {{si_entropy_min}} to {{si_entropy_max}} nats across {{si_tmaze_steps}} steps in {{pymdp_mode}} mode."} ![Observation and action traces for the T-maze rollout (action diversity {{si_action_diversity}}).](../output/figures/si_obs_action_trace.png){#fig:si_obs_action_trace width=90% fig-alt="Dual-panel plot of observation index and action index versus timestep for the pymdp T-maze rollout. The upper panel shows discrete observations; the lower panel shows actions. Goal reached flag is {{si_goal_reached}}."} diff --git a/projects/templates/template_active_inference/manuscript/sections/imrad/results_si_tmaze/prose.md b/projects/templates/template_active_inference/manuscript/sections/imrad/results_si_tmaze/prose.md index 8eceb643e..2b57078a7 100644 --- a/projects/templates/template_active_inference/manuscript/sections/imrad/results_si_tmaze/prose.md +++ b/projects/templates/template_active_inference/manuscript/sections/imrad/results_si_tmaze/prose.md @@ -3,3 +3,5 @@ The pymdp harness rolls out a T-maze active-inference agent in `{{pymdp_mode}}` Steps recorded: {{si_tmaze_steps}}. Mean belief entropy: {{si_tmaze_mean_belief_entropy}}. Belief entropy over the rollout is traced in [@fig:si_belief_entropy_curve]; the paired observation and action indices are in [@fig:si_obs_action_trace]. The default `state_inference` mode runs pymdp `infer_states` and **reports** the resulting posterior (belief entropy and the state-1 marginal), but the action is chosen by an open-loop scripted rule on the observation index — not by the posterior — so the inferred belief here is observed, not acted on. Under the toy transition model, expected-free-energy policy inference reaches the goal in {{si_policy_comparison_policy_goal_count}} of its rows versus {{si_policy_comparison_state_goal_count}} for the scripted state-inference rule: no behavioral advantage on this two-state, horizon-{{si_tmaze_policy_len}} maze, which is the measured content of the deliberately-too-small claim. Policy-comparison rows: {{si_policy_comparison_run_count}} across state-inference and policy-inference modes; goal-reaching rows: {{si_policy_comparison_goal_reached_count}}. Graph-world extension rows: {{si_graph_world_steps}} over {{si_graph_world_node_count}} nodes, with goal-reached flag {{si_graph_world_goal_reached}}. + +The expected free energy that scores those policies decomposes in closed form ([@fig:efe_decomposition]). Across the {{efe_policy_count}} length-{{si_tmaze_policy_len}} policies on the T-maze generative model, the expected-free-energy-minimising policy is `{{efe_minimizing_policy}}` with $G$ = {{efe_minimizing_total_formatted}} nats, splitting into risk {{efe_risk_at_min_formatted}} (the pragmatic deviation of predicted outcomes from preferences) and ambiguity {{efe_ambiguity_at_min_formatted}} (the expected likelihood entropy) nats. The same $G$ splits equivalently into pragmatic value {{efe_pragmatic_at_min_formatted}} (expected log-preference) and epistemic value {{efe_epistemic_at_min_formatted}} (state-outcome mutual information) nats — the term that drives information-seeking. The two forms are exactly equal: risk + ambiguity + pragmatic + epistemic vanishes to within {{efe_max_identity_residual_formatted}} across every policy, the action-selection twin of the analytical free-energy decomposition identity ([@sec:results_free_energy]). diff --git a/projects/templates/template_active_inference/src/manuscript/variables.py b/projects/templates/template_active_inference/src/manuscript/variables.py index f6c396cc1..ca47eabd8 100644 --- a/projects/templates/template_active_inference/src/manuscript/variables.py +++ b/projects/templates/template_active_inference/src/manuscript/variables.py @@ -99,6 +99,30 @@ def _gnn_spec_version(project_root: Path) -> str: return "" +def _efe_token_values() -> dict[str, Any]: + """Manuscript tokens for the closed-form Expected Free Energy decomposition. + + Computed directly from the finite T-maze generative model (deterministic, no + sampling), so the hydrated numbers are reproducible and back the Results prose. + """ + from simulation.efe_decomposition import decompose_all_policies + from simulation.tmaze_model import build_tmaze_generative_model + + result = decompose_all_policies(build_tmaze_generative_model()) + best_policy = result["efe_minimizing_policy"] + best_row = next(row for row in result["rows"] if row["policy"] == best_policy) + return { + "efe_policy_count": int(result["policy_count"]), + "efe_minimizing_policy": "".join(str(a) for a in best_policy), + "efe_minimizing_total_formatted": f"{result['efe_minimizing_total']:.4f}", + "efe_risk_at_min_formatted": f"{best_row['risk']:.4f}", + "efe_ambiguity_at_min_formatted": f"{best_row['ambiguity']:.4f}", + "efe_pragmatic_at_min_formatted": f"{best_row['pragmatic_value']:.4f}", + "efe_epistemic_at_min_formatted": f"{best_row['epistemic_value']:.4f}", + "efe_max_identity_residual_formatted": f"{result['max_identity_residual']:.1e}", + } + + def generate_variables(project_root: Path, *, require_analysis_outputs: bool = True) -> dict[str, Any]: root = project_root.resolve() hp = load_hyperparameters() @@ -201,6 +225,7 @@ def generate_variables(project_root: Path, *, require_analysis_outputs: bool = T "si_policy_comparison_policy_goal_count": policy_goal_by_mode["policy_inference"], "si_policy_comparison_complete_grid": int(bool(policy_summary.get("complete_grid", False))), "si_policy_efe_rows_explained": int(bool(policy_summary.get("all_efe_rows_explained", False))), + **_efe_token_values(), "pymdp_policy_posterior_row_count": posterior_data.get("row_count", 0), "pymdp_policy_posterior_available_count": posterior_data.get("available_row_count", 0), "pymdp_policy_posteriors_normalized": int(