Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ dependencies = [
"sqlite-vec>=0.1.6",
"openai>=1.100.2",
"logfire>=4.19.0",
"psutil>=5.9.0",
]

[project.urls]
Expand Down
123 changes: 122 additions & 1 deletion src/basic_memory/cli/commands/db.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Database management commands."""

import os
from dataclasses import dataclass
from pathlib import Path
from pathlib import Path, PurePosixPath, PureWindowsPath

import psutil
import typer
from loguru import logger
from rich.console import Console
Expand All @@ -21,6 +23,107 @@
console = Console()


def _is_basic_memory_mcp(cmdline: list[str]) -> bool:
"""Heuristic: does this argv represent a `basic-memory mcp` server?

The MCP server can be launched any of:
basic-memory mcp
bm mcp # entrypoint alias from pyproject.toml
python -m basic_memory.cli.main mcp # module form
uv run basic-memory mcp / uv run bm mcp # uv wrappers
/abs/path/to/{bm,basic-memory}[.exe] mcp

A reliable match needs both signals:
1. "mcp" appears as an exact argv token (not "mcp-foo").
2. Some argv token names the basic-memory entrypoint — either by
hyphen/underscore form, or as a `bm` script (covers `/usr/local/bin/bm`,
`bm.exe`, etc. via Path.stem).
"""
if "mcp" not in cmdline:
return False
for arg in cmdline:
if "basic-memory" in arg or "basic_memory" in arg:
return True
# Try both POSIX and Windows path interpretations so a test on
# macOS still recognizes `C:\\...\\bm.exe`, and a real Windows
# run still recognizes `/usr/local/bin/bm`. Path() alone uses
# the host OS, which gives wrong stems for foreign separators.
if PurePosixPath(arg).stem == "bm" or PureWindowsPath(arg).stem == "bm":
return True
return False


def _find_live_mcp_processes() -> list[tuple[int, str]]:
"""Return (pid, joined_cmdline) for live `basic-memory mcp` processes.

Why this exists (issue #765):
On POSIX, `Path.unlink()` removes the directory entry but the inode
survives as long as any process holds the file open. A `bm reset`
run while Claude Desktop (or another MCP client) is alive will
therefore "succeed" — but the still-running MCP keeps reading the
old, now-invisible memory.db inode and returns phantom rows. On
Windows the OS naturally raises PermissionError on `unlink()`, so
the bug is POSIX-specific. We detect proactively to give the same
error experience on every platform before doing damage.

The current process is excluded so this can be called from inside a
`bm reset` invocation. NoSuchProcess / AccessDenied are swallowed
because process tables race with the scan and we don't want a
transient permission error to mask a real zombie.
"""
me = os.getpid()
matches: list[tuple[int, str]] = []
for proc in psutil.process_iter(["pid", "cmdline"]):
try:
pid = proc.info.get("pid")
if pid is None or pid == me:
continue
cmdline = proc.info.get("cmdline") or []
if not cmdline:
continue
if _is_basic_memory_mcp(cmdline):
matches.append((pid, " ".join(cmdline)))
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
return matches


def _abort_if_mcp_processes_alive() -> None:
"""Refuse `bm reset` while basic-memory MCP processes are still running.

See _find_live_mcp_processes for the underlying POSIX-vs-Windows
rationale. Prints a per-PID list and platform-appropriate cleanup
instructions, then exits non-zero so destructive work never starts.
"""
zombies = _find_live_mcp_processes()
if not zombies:
return

console.print(
"[red]Refusing to reset:[/red] basic-memory MCP processes are still running."
)
console.print(
"[yellow]On macOS/Linux these would keep reading the deleted memory.db inode "
"and return phantom search results (see #765).[/yellow]"
)
for pid, cmd in zombies:
console.print(f" PID {pid}: {cmd}")
console.print("\n[bold]How to clean up:[/bold]")
console.print(" 1. Quit Claude Desktop and any other MCP clients.")
if os.name == "nt":
console.print(
" 2. Verify nothing remains: "
"[green]Get-CimInstance Win32_Process | "
"Where-Object {$_.CommandLine -like '*basic-memory*mcp*'}[/green]"
)
else:
console.print(
" 2. Verify nothing remains: [green]pgrep -fa 'basic-memory mcp'[/green]"
)
console.print(" 3. Re-run [green]bm reset[/green].")
raise typer.Exit(1)


@dataclass(slots=True)
class EmbeddingProgress:
"""Typed CLI progress payload for embedding backfills."""
Expand Down Expand Up @@ -86,6 +189,16 @@ async def _reindex_projects(app_config):
@app.command()
def reset(
reindex: bool = typer.Option(False, "--reindex", help="Rebuild db index from filesystem"),
force: bool = typer.Option(
False,
"--force",
help=(
"Skip the pre-flight check that refuses to reset while "
"basic-memory MCP processes are running. Use only in "
"automated workflows where you've already ensured no MCP "
"clients are attached to the database."
),
),
): # pragma: no cover
"""Reset database (drop all tables and recreate)."""
console.print(
Expand All @@ -94,6 +207,14 @@ def reset(
"Use [green]bm reset --reindex[/green] to automatically rebuild the index afterward."
)
if typer.confirm("Reset the database index?"):
# Pre-flight: refuse to proceed if MCP processes still hold the DB
# file open. POSIX would silently let us unlink the inode while
# they keep reading it; Windows would error here anyway. See
# _find_live_mcp_processes for the full story. --force is the
# documented escape hatch for scripted/CI runs.
if not force:
_abort_if_mcp_processes_alive()

logger.info("Resetting database...")
config_manager = ConfigManager()
app_config = config_manager.config
Expand Down
11 changes: 8 additions & 3 deletions tests/cli/test_db_reset_exit.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,14 @@ def _isolated_env(tmp_path: Path) -> dict[str, str]:

@skip_on_windows
def test_bm_reset_exits_cleanly(tmp_path: Path):
"""bm reset should finish and exit cleanly with non-interactive confirmation."""
"""bm reset should finish and exit cleanly with non-interactive confirmation.

Uses --force to skip the live-MCP pre-flight (#765); we're verifying
process exit semantics here, not the pre-flight, which has dedicated
coverage in test_db_reset_zombie_check.py.
"""
result = subprocess.run(
["uv", "run", "bm", "reset"],
["uv", "run", "bm", "reset", "--force"],
input="y\n",
capture_output=True,
text=True,
Expand All @@ -44,7 +49,7 @@ def test_bm_reset_exits_cleanly(tmp_path: Path):
def test_bm_reset_reindex_exits_cleanly(tmp_path: Path):
"""bm reset --reindex should finish and exit cleanly with non-interactive confirmation."""
result = subprocess.run(
["uv", "run", "bm", "reset", "--reindex"],
["uv", "run", "bm", "reset", "--reindex", "--force"],
input="y\n",
capture_output=True,
text=True,
Expand Down
157 changes: 157 additions & 0 deletions tests/cli/test_db_reset_zombie_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
"""Regression tests for the live-MCP-process pre-flight in `bm reset` (#765)."""

from __future__ import annotations

import os

import psutil
import pytest
import typer

from basic_memory.cli.commands import db as db_cmd


class _FakeProc:
"""Minimal stand-in for psutil.Process; only exposes .info."""

def __init__(self, pid: int, cmdline: list[str] | None):
self.info = {"pid": pid, "cmdline": cmdline}


def _patch_iter(monkeypatch: pytest.MonkeyPatch, procs) -> None:
"""Replace psutil.process_iter with a fixed iterator.

Procs is intentionally untyped: tests pass a mix of _FakeProc and
error-raising stand-ins to exercise the per-process exception path.
"""
monkeypatch.setattr(
psutil,
"process_iter",
lambda attrs=None: iter(procs),
)


class TestFindLiveMcpProcesses:
def test_returns_empty_when_no_mcp_processes(self, monkeypatch):
_patch_iter(
monkeypatch,
[
_FakeProc(pid=11, cmdline=["python", "-m", "http.server"]),
_FakeProc(pid=22, cmdline=["bm", "sync"]),
],
)
assert db_cmd._find_live_mcp_processes() == []

def test_matches_basic_memory_mcp_invocations(self, monkeypatch):
_patch_iter(
monkeypatch,
[
# Direct `basic-memory mcp`.
_FakeProc(pid=101, cmdline=["/usr/bin/python", "basic-memory", "mcp"]),
# `bm mcp` alias entrypoint — must also match (#765 P1).
_FakeProc(pid=202, cmdline=["bm", "mcp"]),
# Python module form, underscore name.
_FakeProc(
pid=303,
cmdline=["python", "-m", "basic_memory.cli.main", "mcp"],
),
# Absolute path to the bm script.
_FakeProc(pid=404, cmdline=["/usr/local/bin/bm", "mcp"]),
# Windows-style bm.exe.
_FakeProc(pid=505, cmdline=["C:\\Users\\me\\.venv\\Scripts\\bm.exe", "mcp"]),
# Should NOT match — `mcp` is a substring of another arg, not a token.
_FakeProc(pid=606, cmdline=["python", "basic-memory", "mcp-helper"]),
# Should NOT match — has `mcp` but no basic-memory/bm signature.
_FakeProc(pid=707, cmdline=["python", "/some/other/server.py", "mcp"]),
],
)
result = db_cmd._find_live_mcp_processes()
pids = sorted(pid for pid, _ in result)
assert pids == [101, 202, 303, 404, 505]

def test_skips_current_process(self, monkeypatch):
me = os.getpid()
_patch_iter(
monkeypatch,
[
_FakeProc(pid=me, cmdline=["python", "basic-memory", "mcp"]),
],
)
# Self-match is suppressed so the helper can be called from inside
# `bm reset` without flagging the running process.
assert db_cmd._find_live_mcp_processes() == []

def test_skips_processes_with_no_cmdline(self, monkeypatch):
_patch_iter(
monkeypatch,
[
_FakeProc(pid=1, cmdline=None), # kernel-style process
_FakeProc(pid=2, cmdline=[]),
],
)
assert db_cmd._find_live_mcp_processes() == []

def test_swallows_per_process_errors(self, monkeypatch):
"""A NoSuchProcess race during iteration must not abort the scan."""

class _Raising:
@property
def info(self):
raise psutil.NoSuchProcess(pid=999)

_patch_iter(
monkeypatch,
[
_Raising(),
_FakeProc(pid=42, cmdline=["python", "basic-memory", "mcp"]),
],
)
result = db_cmd._find_live_mcp_processes()
assert [pid for pid, _ in result] == [42]


class TestAbortIfMcpProcessesAlive:
def test_no_op_when_no_processes(self, monkeypatch):
monkeypatch.setattr(db_cmd, "_find_live_mcp_processes", lambda: [])
# Must not raise — destructive work should proceed.
db_cmd._abort_if_mcp_processes_alive()

def test_exits_with_pids_when_processes_alive(self, monkeypatch, capsys):
monkeypatch.setattr(
db_cmd,
"_find_live_mcp_processes",
lambda: [(123, "python basic-memory mcp"), (456, "uv run bm mcp wrapper")],
)
with pytest.raises(typer.Exit) as exc_info:
db_cmd._abort_if_mcp_processes_alive()
assert exc_info.value.exit_code == 1

captured = capsys.readouterr()
# PIDs surface so the user can target the cleanup themselves.
assert "123" in captured.out
assert "456" in captured.out
assert "MCP processes" in captured.out

def test_prints_platform_specific_cleanup_hint_posix(self, monkeypatch, capsys):
monkeypatch.setattr(os, "name", "posix")
monkeypatch.setattr(
db_cmd,
"_find_live_mcp_processes",
lambda: [(7, "python basic-memory mcp")],
)
with pytest.raises(typer.Exit):
db_cmd._abort_if_mcp_processes_alive()
out = capsys.readouterr().out
assert "pgrep -fa 'basic-memory mcp'" in out

def test_prints_platform_specific_cleanup_hint_windows(self, monkeypatch, capsys):
monkeypatch.setattr(os, "name", "nt")
monkeypatch.setattr(
db_cmd,
"_find_live_mcp_processes",
lambda: [(7, "python basic-memory mcp")],
)
with pytest.raises(typer.Exit):
db_cmd._abort_if_mcp_processes_alive()
out = capsys.readouterr().out
assert "Get-CimInstance" in out
Loading
Loading