From d5cbda8300d1e09f13412848239dde9bc6f290d1 Mon Sep 17 00:00:00 2001 From: "dmadisetti@coreweave.com" Date: Wed, 24 Jun 2026 15:57:17 -0700 Subject: [PATCH] feat(export): bundle session caches into html-wasm --execute output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When `marimo export html-wasm --execute` runs, the executed session's LazyLoader writes an export manifest at kernel teardown listing the cache keys it produced. The export step copies exactly those files into `/public/cache/`, where the WASM store's HTTP fallback fetches them — so a cached notebook ships its caches and skips recomputation in the browser. Stacked on the LazyStore dual-mode backend (#9898), which provides `LazyLoader.flush_all()`, the `_ACTIVE_LAZY_LOADERS` registry, and `WasmExportableStore.export_manifest()`. --- marimo/_cli/export/commands.py | 47 ++++++++++++- marimo/_runtime/runtime.py | 31 +++++++++ marimo/_session/managers/kernel.py | 11 ++- tests/_cli/test_export_cache_bundle.py | 92 ++++++++++++++++++++++++++ 4 files changed, 176 insertions(+), 5 deletions(-) create mode 100644 tests/_cli/test_export_cache_bundle.py diff --git a/marimo/_cli/export/commands.py b/marimo/_cli/export/commands.py index 8e3a2b01cd5..3af7c2be98e 100644 --- a/marimo/_cli/export/commands.py +++ b/marimo/_cli/export/commands.py @@ -779,6 +779,45 @@ def export_callback_impl(file_path: MarimoPath) -> ExportResult: ) +def _copy_lazy_caches_to_export( + marimo_file: MarimoPath, out_dir: Path +) -> None: + """Bundle lazy caches written by an executed export into the out dir. + + The default LazyStore writes caches to __marimo__/cache/ and kernel + teardown records an export manifest of exactly the keys this session + produced. After `--execute` runs the app, copy those files straight + into `/public/cache/`, where the WASM store's HTTP fallback + fetches them. Copying into the export (rather than the notebook's + public/ folder) avoids both polluting the source tree and the + ordering constraint with `export_public_folder`, which runs before + the app executes. + """ + import json + import shutil + + from marimo._utils.paths import notebook_output_dir + + notebook_path = Path(marimo_file.absolute_name) + cache_src = notebook_output_dir(notebook_path.parent) / "cache" + manifest_file = cache_src / ".lazy_export_manifest.json" + if not manifest_file.exists(): + echo("No lazy caches to bundle.") + return + keys: list[str] = json.loads(manifest_file.read_text()) + cache_dst = out_dir / "public" / "cache" + copied = 0 + for key in keys: + src_file = cache_src / key + if src_file.exists(): + dst_file = cache_dst / key + dst_file.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src_file, dst_file) + copied += 1 + manifest_file.unlink() + echo(f"Bundled {copied} lazy cache files into {cache_dst}.") + + @click.command( cls=ColoredCommand, help="""Export a notebook as a WASM-powered standalone HTML file. @@ -987,9 +1026,11 @@ def export_callback(file_path: MarimoPath) -> ExportResult: create_cloudflare_files(parse_title(name), out_dir) outfile = out_dir / filename - return watch_and_export( - MarimoPath(name), outfile, watch, export_callback, force - ) + watch_and_export(MarimoPath(name), outfile, watch, export_callback, force) + if execute and not watch: + # The executed session's LazyLoader manifest (written at kernel + # teardown) lists the cache files to ship with the export. + _copy_lazy_caches_to_export(marimo_file, out_dir) export.add_command(html) diff --git a/marimo/_runtime/runtime.py b/marimo/_runtime/runtime.py index 5c4e54f7ced..472a4eccbf9 100644 --- a/marimo/_runtime/runtime.py +++ b/marimo/_runtime/runtime.py @@ -2588,4 +2588,35 @@ def launch_kernel( else: asyncio.run(coro) + # Flush pending LazyLoader writes and dump export manifests + # before the session tears down (so the store is still usable). + try: + import json as _json + + from marimo._save.loaders.lazy import ( + _ACTIVE_LAZY_LOADERS, + LazyLoader, + ) + from marimo._save.stores.store import WasmExportableStore + + LazyLoader.flush_all() + for _loader in list(_ACTIVE_LAZY_LOADERS.values()): + _store = _loader.store + if isinstance(_store, WasmExportableStore): + _manifest = _store.export_manifest() + LOGGER.debug( + "Export manifest for %s: %d keys", + _loader.name, + len(_manifest), + ) + if _manifest: + _store.put( + ".lazy_export_manifest.json", + _json.dumps(_manifest).encode(), + ) + except Exception: + LOGGER.debug( + "Failed to flush lazy caches on teardown", exc_info=True + ) + streams.close(use_fd_redirect) diff --git a/marimo/_session/managers/kernel.py b/marimo/_session/managers/kernel.py index 7da4397c3f8..b00e272a4ed 100644 --- a/marimo/_session/managers/kernel.py +++ b/marimo/_session/managers/kernel.py @@ -256,11 +256,15 @@ def close_kernel(self) -> None: ) return - # Otherwise, we have something that is `ProcessLike` - if self.profile_path is not None and self.kernel_task.is_alive(): + # Otherwise, we have something that is `ProcessLike`. + # Request a clean shutdown first so the kernel can flush pending + # work (e.g. LazyLoader cache writes) before we kill it. + if self.kernel_task.is_alive(): self.queue_manager.put_control_request( commands.StopKernelCommand() ) + + if self.profile_path is not None and self.kernel_task.is_alive(): # Hack: Wait for kernel to exit and write out profile; # joining the process hangs, but not sure why. print_( @@ -273,6 +277,9 @@ def close_kernel(self) -> None: time.sleep(1) self.queue_manager.close_queues() + # Give the kernel time for a clean shutdown (flush caches, etc.) + if self.kernel_task.is_alive(): + self.kernel_task.join(timeout=5) try: try_kill_process_and_group(self.kernel_task) except ProcessLookupError: diff --git a/tests/_cli/test_export_cache_bundle.py b/tests/_cli/test_export_cache_bundle.py new file mode 100644 index 00000000000..1ff1232699d --- /dev/null +++ b/tests/_cli/test_export_cache_bundle.py @@ -0,0 +1,92 @@ +# Copyright 2026 Marimo. All rights reserved. +"""`_copy_lazy_caches_to_export` — bundling executed-export caches. + +The function is filesystem-driven: kernel teardown records an export +manifest of the cache keys a session produced, and the export step +copies exactly those files into `/public/cache/` where the +WASM store's HTTP fallback fetches them. These tests exercise it +against a synthetic `__marimo__/cache` tree; no kernel run required. +""" + +from __future__ import annotations + +import json +from typing import TYPE_CHECKING + +from marimo._cli.export.commands import _copy_lazy_caches_to_export +from marimo._utils.marimo_path import MarimoPath +from marimo._utils.paths import notebook_output_dir + +if TYPE_CHECKING: + from pathlib import Path + + +def _notebook(tmp_path: Path) -> MarimoPath: + nb = tmp_path / "nb.py" + nb.write_text("import marimo\napp = marimo.App()\n") + return MarimoPath(str(nb)) + + +def _cache_dir(tmp_path: Path) -> Path: + cache = notebook_output_dir(tmp_path) / "cache" + cache.mkdir(parents=True) + return cache + + +def test_no_manifest_is_a_noop(tmp_path: Path) -> None: + out_dir = tmp_path / "dist" + out_dir.mkdir() + _copy_lazy_caches_to_export(_notebook(tmp_path), out_dir) + assert not (out_dir / "public" / "cache").exists() + + +def test_manifest_keys_copied_and_manifest_consumed(tmp_path: Path) -> None: + cache = _cache_dir(tmp_path) + (cache / "lazy").mkdir() + (cache / "lazy" / "E_abc.jsonl").write_bytes(b"manifest-line\n") + (cache / "lazy" / "blob.npy").write_bytes(b"\x93NUMPY") + manifest = cache / ".lazy_export_manifest.json" + manifest.write_text(json.dumps(["lazy/E_abc.jsonl", "lazy/blob.npy"])) + + out_dir = tmp_path / "dist" + out_dir.mkdir() + _copy_lazy_caches_to_export(_notebook(tmp_path), out_dir) + + dst = out_dir / "public" / "cache" + assert (dst / "lazy" / "E_abc.jsonl").read_bytes() == b"manifest-line\n" + assert (dst / "lazy" / "blob.npy").read_bytes() == b"\x93NUMPY" + # One-shot: the manifest is consumed so a later re-export of an + # unexecuted notebook doesn't bundle stale caches. + assert not manifest.exists() + + +def test_missing_listed_file_skipped(tmp_path: Path) -> None: + cache = _cache_dir(tmp_path) + (cache / "present.bin").write_bytes(b"ok") + (cache / ".lazy_export_manifest.json").write_text( + json.dumps(["present.bin", "evicted.bin"]) + ) + + out_dir = tmp_path / "dist" + out_dir.mkdir() + _copy_lazy_caches_to_export(_notebook(tmp_path), out_dir) + + dst = out_dir / "public" / "cache" + assert (dst / "present.bin").exists() + assert not (dst / "evicted.bin").exists() + + +def test_keys_outside_session_not_bundled(tmp_path: Path) -> None: + """Only manifest-listed keys ship — other cache files on disk stay.""" + cache = _cache_dir(tmp_path) + (cache / "mine.bin").write_bytes(b"mine") + (cache / "other.bin").write_bytes(b"other-session") + (cache / ".lazy_export_manifest.json").write_text(json.dumps(["mine.bin"])) + + out_dir = tmp_path / "dist" + out_dir.mkdir() + _copy_lazy_caches_to_export(_notebook(tmp_path), out_dir) + + dst = out_dir / "public" / "cache" + assert (dst / "mine.bin").exists() + assert not (dst / "other.bin").exists()