Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 44 additions & 3 deletions marimo/_cli/export/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,45 @@ def export_callback_impl(file_path: MarimoPath) -> ExportResult:
)


def _copy_lazy_caches_to_export(
marimo_file: MarimoPath, out_dir: Path
) -> None:
"""Bundle lazy caches written by an executed export into the out dir.

The default LazyStore writes caches to __marimo__/cache/ and kernel
teardown records an export manifest of exactly the keys this session
produced. After `--execute` runs the app, copy those files straight
into `<out_dir>/public/cache/`, where the WASM store's HTTP fallback
fetches them. Copying into the export (rather than the notebook's
public/ folder) avoids both polluting the source tree and the
ordering constraint with `export_public_folder`, which runs before
the app executes.
"""
import json
import shutil

from marimo._utils.paths import notebook_output_dir

notebook_path = Path(marimo_file.absolute_name)
cache_src = notebook_output_dir(notebook_path.parent) / "cache"
manifest_file = cache_src / ".lazy_export_manifest.json"
if not manifest_file.exists():
echo("No lazy caches to bundle.")
return
keys: list[str] = json.loads(manifest_file.read_text())
cache_dst = out_dir / "public" / "cache"
copied = 0
for key in keys:
src_file = cache_src / key
if src_file.exists():
dst_file = cache_dst / key
dst_file.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(src_file, dst_file)
copied += 1
manifest_file.unlink()
echo(f"Bundled {copied} lazy cache files into {cache_dst}.")


@click.command(
cls=ColoredCommand,
help="""Export a notebook as a WASM-powered standalone HTML file.
Expand Down Expand Up @@ -987,9 +1026,11 @@ def export_callback(file_path: MarimoPath) -> ExportResult:
create_cloudflare_files(parse_title(name), out_dir)

outfile = out_dir / filename
return watch_and_export(
MarimoPath(name), outfile, watch, export_callback, force
)
watch_and_export(MarimoPath(name), outfile, watch, export_callback, force)
if execute and not watch:
# The executed session's LazyLoader manifest (written at kernel
# teardown) lists the cache files to ship with the export.
_copy_lazy_caches_to_export(marimo_file, out_dir)


export.add_command(html)
Expand Down
31 changes: 31 additions & 0 deletions marimo/_runtime/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2588,4 +2588,35 @@ def launch_kernel(
else:
asyncio.run(coro)

# Flush pending LazyLoader writes and dump export manifests
# before the session tears down (so the store is still usable).
try:
import json as _json

from marimo._save.loaders.lazy import (
_ACTIVE_LAZY_LOADERS,
LazyLoader,
)
from marimo._save.stores.store import WasmExportableStore

LazyLoader.flush_all()
for _loader in list(_ACTIVE_LAZY_LOADERS.values()):
_store = _loader.store
if isinstance(_store, WasmExportableStore):
_manifest = _store.export_manifest()
LOGGER.debug(
"Export manifest for %s: %d keys",
_loader.name,
len(_manifest),
)
if _manifest:
_store.put(
".lazy_export_manifest.json",
_json.dumps(_manifest).encode(),
)
except Exception:
LOGGER.debug(
"Failed to flush lazy caches on teardown", exc_info=True
)

streams.close(use_fd_redirect)
11 changes: 9 additions & 2 deletions marimo/_session/managers/kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,11 +256,15 @@ def close_kernel(self) -> None:
)
return

# Otherwise, we have something that is `ProcessLike`
if self.profile_path is not None and self.kernel_task.is_alive():
# Otherwise, we have something that is `ProcessLike`.
# Request a clean shutdown first so the kernel can flush pending
# work (e.g. LazyLoader cache writes) before we kill it.
if self.kernel_task.is_alive():
self.queue_manager.put_control_request(
commands.StopKernelCommand()
)

if self.profile_path is not None and self.kernel_task.is_alive():
# Hack: Wait for kernel to exit and write out profile;
# joining the process hangs, but not sure why.
print_(
Expand All @@ -273,6 +277,9 @@ def close_kernel(self) -> None:
time.sleep(1)

self.queue_manager.close_queues()
# Give the kernel time for a clean shutdown (flush caches, etc.)
if self.kernel_task.is_alive():
self.kernel_task.join(timeout=5)
try:
try_kill_process_and_group(self.kernel_task)
except ProcessLookupError:
Expand Down
92 changes: 92 additions & 0 deletions tests/_cli/test_export_cache_bundle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Copyright 2026 Marimo. All rights reserved.
"""`_copy_lazy_caches_to_export` — bundling executed-export caches.

The function is filesystem-driven: kernel teardown records an export
manifest of the cache keys a session produced, and the export step
copies exactly those files into `<out_dir>/public/cache/` where the
WASM store's HTTP fallback fetches them. These tests exercise it
against a synthetic `__marimo__/cache` tree; no kernel run required.
"""

from __future__ import annotations

import json
from typing import TYPE_CHECKING

from marimo._cli.export.commands import _copy_lazy_caches_to_export
from marimo._utils.marimo_path import MarimoPath
from marimo._utils.paths import notebook_output_dir

if TYPE_CHECKING:
from pathlib import Path


def _notebook(tmp_path: Path) -> MarimoPath:
nb = tmp_path / "nb.py"
nb.write_text("import marimo\napp = marimo.App()\n")
return MarimoPath(str(nb))


def _cache_dir(tmp_path: Path) -> Path:
cache = notebook_output_dir(tmp_path) / "cache"
cache.mkdir(parents=True)
return cache


def test_no_manifest_is_a_noop(tmp_path: Path) -> None:
out_dir = tmp_path / "dist"
out_dir.mkdir()
_copy_lazy_caches_to_export(_notebook(tmp_path), out_dir)
assert not (out_dir / "public" / "cache").exists()


def test_manifest_keys_copied_and_manifest_consumed(tmp_path: Path) -> None:
cache = _cache_dir(tmp_path)
(cache / "lazy").mkdir()
(cache / "lazy" / "E_abc.jsonl").write_bytes(b"manifest-line\n")
(cache / "lazy" / "blob.npy").write_bytes(b"\x93NUMPY")
manifest = cache / ".lazy_export_manifest.json"
manifest.write_text(json.dumps(["lazy/E_abc.jsonl", "lazy/blob.npy"]))

out_dir = tmp_path / "dist"
out_dir.mkdir()
_copy_lazy_caches_to_export(_notebook(tmp_path), out_dir)

dst = out_dir / "public" / "cache"
assert (dst / "lazy" / "E_abc.jsonl").read_bytes() == b"manifest-line\n"
assert (dst / "lazy" / "blob.npy").read_bytes() == b"\x93NUMPY"
# One-shot: the manifest is consumed so a later re-export of an
# unexecuted notebook doesn't bundle stale caches.
assert not manifest.exists()


def test_missing_listed_file_skipped(tmp_path: Path) -> None:
cache = _cache_dir(tmp_path)
(cache / "present.bin").write_bytes(b"ok")
(cache / ".lazy_export_manifest.json").write_text(
json.dumps(["present.bin", "evicted.bin"])
)

out_dir = tmp_path / "dist"
out_dir.mkdir()
_copy_lazy_caches_to_export(_notebook(tmp_path), out_dir)

dst = out_dir / "public" / "cache"
assert (dst / "present.bin").exists()
assert not (dst / "evicted.bin").exists()


def test_keys_outside_session_not_bundled(tmp_path: Path) -> None:
"""Only manifest-listed keys ship — other cache files on disk stay."""
cache = _cache_dir(tmp_path)
(cache / "mine.bin").write_bytes(b"mine")
(cache / "other.bin").write_bytes(b"other-session")
(cache / ".lazy_export_manifest.json").write_text(json.dumps(["mine.bin"]))

out_dir = tmp_path / "dist"
out_dir.mkdir()
_copy_lazy_caches_to_export(_notebook(tmp_path), out_dir)

dst = out_dir / "public" / "cache"
assert (dst / "mine.bin").exists()
assert not (dst / "other.bin").exists()
Loading