From e9f864eda452f34530c415d78115c59d85667ba8 Mon Sep 17 00:00:00 2001 From: phernandez Date: Tue, 28 Apr 2026 22:00:52 -0500 Subject: [PATCH 1/2] test(core): regression guard for vector-row cleanup on entity delete (#764) The delete-path cleanup the reporter asked for already shipped in PR #733 ("clean up delete vectors and cloud sync", merged 2026-04-10): handle_delete calls _clear_entity_vectors which delegates to repository.delete_entity_vector_rows. Issue #764 was filed two weeks later without referencing #733, so this PR locks the wiring in with a regression test. The test spies on the repository's delete_entity_vector_rows and asserts handle_delete invokes it with the entity id. monkeypatch flips _semantic_enabled=True so we exercise the real delegation rather than the no-op short-circuit that fires when semantic is disabled. The two follow-up comments on #764 describe distinct issues that are out of scope here: - pavelasm: project info doesn't surface orphaned vector embeddings. Reporting gap for pre-existing drift, not a delete-path bug. - natedev: the watch service single-file sync path doesn't refresh vectors on update. Different code path (sync_service.sync_file), not delete-related. Both should land as separate issues. Closes #764 Signed-off-by: phernandez --- tests/services/test_search_service.py | 37 +++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/services/test_search_service.py b/tests/services/test_search_service.py index d71f0d5f..bdf5e356 100644 --- a/tests/services/test_search_service.py +++ b/tests/services/test_search_service.py @@ -258,6 +258,43 @@ async def test_delete_entity_without_permalink(search_service, sample_entity): await search_service.handle_delete(sample_entity) +@pytest.mark.asyncio +async def test_handle_delete_clears_entity_vectors( + search_service, sample_entity, monkeypatch +): + """Regression guard for #764: handle_delete must drive vector-row cleanup + so deleting an entity doesn't leave orphaned rows in `search_vector_chunks` + or `search_vector_embeddings`. + + Verified by spying on the repository's `delete_entity_vector_rows`. The + short-circuit path inside `_clear_entity_vectors` (semantic disabled) is + bypassed by forcing `_semantic_enabled=True` so we exercise the real + delegation, not the no-op branch. + """ + calls: list[int] = [] + + async def spy_delete_entity_vector_rows(entity_id: int) -> None: + calls.append(entity_id) + + # Force the cleanup path even if the test repo is configured without + # semantic enabled — we're asserting the wiring, not embedding behavior. + monkeypatch.setattr( + search_service.repository, "_semantic_enabled", True, raising=False + ) + monkeypatch.setattr( + search_service.repository, + "delete_entity_vector_rows", + spy_delete_entity_vector_rows, + ) + + await search_service.handle_delete(sample_entity) + + assert calls == [sample_entity.id], ( + f"handle_delete must call delete_entity_vector_rows({sample_entity.id}); " + f"got calls={calls}" + ) + + @pytest.mark.asyncio async def test_no_criteria(search_service, test_graph): """Test search with no criteria returns empty list.""" From 7386929bebe9743eeddb120b3ef0940afcea6c6a Mon Sep 17 00:00:00 2001 From: phernandez Date: Tue, 28 Apr 2026 22:28:54 -0500 Subject: [PATCH 2/2] test(core): drop raising=False on _semantic_enabled monkeypatch The repository instance under test always exposes `_semantic_enabled`, so `raising=False` was masking a hypothetical rename rather than guarding against a real condition. Use the default raising=True so a future rename or removal fails loudly at the patch site instead of mutating drift downstream. Signed-off-by: phernandez --- tests/services/test_search_service.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/services/test_search_service.py b/tests/services/test_search_service.py index bdf5e356..609c68ae 100644 --- a/tests/services/test_search_service.py +++ b/tests/services/test_search_service.py @@ -278,9 +278,7 @@ async def spy_delete_entity_vector_rows(entity_id: int) -> None: # Force the cleanup path even if the test repo is configured without # semantic enabled — we're asserting the wiring, not embedding behavior. - monkeypatch.setattr( - search_service.repository, "_semantic_enabled", True, raising=False - ) + monkeypatch.setattr(search_service.repository, "_semantic_enabled", True) monkeypatch.setattr( search_service.repository, "delete_entity_vector_rows",