From 7105f3ff259488889d4ff88aaba1cc21f4eef17f Mon Sep 17 00:00:00 2001
From: Chad Kunde <snokun509@gmail.com>
Date: Thu, 25 Jun 2026 00:49:34 -0500
Subject: [PATCH] =?UTF-8?q?test:=20reproduce=20large=E2=86=92small=20model?=
 =?UTF-8?q?-switch=20overflow=20(GLM-5.2=20512k=20=E2=86=92=20GPT-5.5=2027?=
 =?UTF-8?q?2k)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When switching from a large-context model to a smaller one mid-session, the
model-change branch in transform.ts clears lastContextPercentage /
lastInputTokens to 0. This suppresses every reduction path on the same
pass: the historian trigger (0% < proactive floor), the 95% emergency
block, and the overflow-recovery bump (needsEmergencyRecovery was just
cleared). The oversized prompt — sized for the old model's window — is
sent to the new smaller model and rejected with 'Input exceeds context
window'. Recovery only arms on the SECOND pass, after the real overflow
error fires the event handler.

The test asserts the expected post-fix behavior: the model-change branch
should detect oldInputTokens > newContextLimit and arm emergency recovery
(recordOverflowDetected) so the existing 95% bump path runs the historian
+ emergency drops BEFORE the prompt leaves. Currently fails — the fix
lands in transform.ts:502-547.
---
 .../src/hooks/magic-context/transform.test.ts | 124 ++++++++++++++++++
 1 file changed, 124 insertions(+)

diff --git a/packages/plugin/src/hooks/magic-context/transform.test.ts b/packages/plugin/src/hooks/magic-context/transform.test.ts
index 63637ec7..63cf25a3 100644
--- a/packages/plugin/src/hooks/magic-context/transform.test.ts
+++ b/packages/plugin/src/hooks/magic-context/transform.test.ts
@@ -23,6 +23,7 @@ import {
     getLastNudgeUndropped,
     getOrCreateSessionMeta,
     getPendingOps,
+    getOverflowState,
     getTagById,
     getTagsBySession,
     incrementHistorianFailure,
@@ -2546,3 +2547,126 @@ describe("createTransform historian failure handling", () => {
         expect(createSession).toHaveBeenCalledTimes(1);
     });
 });
+
+describe("createTransform model switch large→small context", () => {
+    it("arms emergency recovery when switching from a large-context to a small-context model that would overflow", async () => {
+        // Reproduces: user switches from GLM-5.2 (512k) to GPT-5.5 (272k).
+        // The session had ~300k input tokens on the old model (~58% of 512k —
+        // well under any trigger threshold). After the switch, that same 300k
+        // is ~110% of the new 272k limit — a guaranteed overflow.
+        //
+        // The model-change branch (transform.ts:502) detects the switch and
+        // currently clears lastContextPercentage / lastInputTokens to 0. This
+        // suppresses every reduction path:
+        //   - historian trigger: 0% < proactive floor → shouldFire=false
+        //   - 95% emergency block: 0% < 95% → no block
+        //   - overflow recovery bump: needsEmergencyRecovery was just cleared
+        // The oversized prompt is sent to the small model → "Input exceeds
+        // context window" error. Recovery only fires on the SECOND pass (after
+        // the real overflow error arms needsEmergencyRecovery via the event
+        // handler). The user sees the error on the first request.
+        //
+        // Expected behavior: when the model-change branch detects that the old
+        // model's lastInputTokens exceed the NEW model's context limit, it
+        // should arm emergency recovery (recordOverflowDetected with the new
+        // limit) so the existing bump-to-95% path fires on the SAME pass —
+        // historian + emergency drops run BEFORE the prompt is sent.
+        //#given
+        useTempDataHome("transform-model-switch-large-small-");
+        const sessionId = "ses-model-shrink";
+        createOpenCodeDbForTransform(sessionId, [
+            { id: "m-raw-1", role: "user", text: "earlier work" },
+            {
+                id: "m-raw-assistant-old",
+                role: "assistant",
+                text: "old model response",
+                providerID: "openrouter",
+                modelID: "z-ai/glm-5.2",
+            },
+            { id: "m-raw-2", role: "user", text: "continue" },
+        ]);
+        const db = openDatabase();
+        // Persist the old model's usage: 300k input tokens at 58% of 512k.
+        // This is a realistic mid-session state — high token mass, moderate
+        // pressure, no historian fire yet.
+        updateSessionMeta(db, sessionId, {
+            lastContextPercentage: 58,
+            lastInputTokens: 300_000,
+            lastObservedModelKey: "openrouter/z-ai/glm-5.2",
+            lastUsageContextLimit: 512_000,
+        });
+
+        // The NEW model (GPT-5.5, 272k) is already in liveModelBySession —
+        // simulating that hook-handlers.ts set it on the chat.message event
+        // before this transform pass.
+        const liveModelBySession = new Map<string, { providerID: string; modelID: string }>([
+            [sessionId, { providerID: "openrouter", modelID: "openai/gpt-5.5" }],
+        ]);
+
+        const scheduler: Scheduler = { shouldExecute: mock(() => "defer" as const) };
+        const transform = createTransform({
+            tagger: createTagger(),
+            scheduler,
+            contextUsageMap: new Map<string, { usage: ContextUsage; updatedAt: number }>([
+                [
+                    sessionId,
+                    {
+                        usage: { percentage: 58, inputTokens: 300_000 },
+                        updatedAt: Date.now(),
+                    },
+                ],
+            ]),
+            db,
+            historyRefreshSessions: new Set<string>(),
+            pendingMaterializationSessions: new Set<string>(),
+            lastHeuristicsTurnId: new Map<string, string>(),
+            clearReasoningAge: 50,
+            protectedTags: 0,
+            liveModelBySession,
+        });
+
+        // The messages array: last assistant is from the OLD model (GLM-5.2).
+        // The transform's findLastAssistantModel will return GLM-5.2, which
+        // mismatches liveModelBySession (GPT-5.5) → model-change branch fires.
+        // The large tool output (~50k tokens) represents content that, combined
+        // with the rest of the 300k-token context, would overflow the new 272k
+        // model — but is NOT individually droppable at 0% pressure.
+        const bigOutput = "const value = compute(input, options); // step output line\n".repeat(
+            3400,
+        );
+        const messages: TestMessage[] = [
+            {
+                info: { id: "m-user", role: "user", sessionID: sessionId },
+                parts: [{ type: "text", text: "continue" }],
+            },
+            {
+                info: {
+                    id: "m-assistant-old",
+                    role: "assistant",
+                    providerID: "openrouter",
+                    modelID: "z-ai/glm-5.2",
+                },
+                parts: [
+                    { type: "text", text: "ok" },
+                    { type: "tool", callID: "call-1", state: { output: bigOutput } },
+                ],
+            },
+        ];
+
+        //#when
+        await transform({}, { messages });
+
+        //#then — the model-change branch armed emergency recovery on this pass
+        // (oldInputTokens 300k > new 272k limit), so the 95% bump path fired
+        // and the historian/emergency-drop machinery ran BEFORE the prompt left.
+        const overflow = getOverflowState(db, sessionId);
+        expect(overflow.needsEmergencyRecovery).toBe(true);
+
+        // The oversized tool output was dropped by the emergency drop path that
+        // the 95% bump armed — it's no longer on the wire that would overflow
+        // the new model.
+        const tags = getTagsBySession(db, sessionId);
+        const toolTag = tags.find((tag) => tag.type === "tool");
+        expect(toolTag?.status).toBe("dropped");
+    });
+});