cocoindex-io · dudegladiator · Jun 7, 2026 · Jun 7, 2026 · Jun 7, 2026 · Jun 7, 2026
diff --git a/README.md b/README.md
@@ -166,7 +166,7 @@ The background daemon starts automatically on first use.
 
 | Command | Description |
 |---------|-------------|
-| `ccc init` | Initialize a project — creates settings files, adds `.cocoindex_code/` to `.gitignore` |
+| `ccc init` | Initialize a project — creates settings files, adds `.cocoindex_code/` to `.gitignore`. Use `--backend turbo-quant` (with `--tq-bits`) to pick the compressed backend; see [Vector Backends](#vector-backends) |
 | `ccc index` | Build or update the index (auto-inits if needed). Shows streaming progress. |
 | `ccc search <query>` | Semantic search across the codebase |
 | `ccc status` | Show index stats (chunk count, file count, language breakdown) |
@@ -189,6 +189,34 @@ ccc search --refresh database schema                 # update index first, then
 
 By default, `ccc search` scopes results to your current working directory (relative to the project root). Use `--path` to override.
 
+## Vector Backends
+
+`ccc` supports two vector-search backends, chosen at `ccc init` and baked into the index:
+
+| Backend | Index size | Search | Best for |
+|---------|-----------|--------|----------|
+| `sqlite-vec` (default) | full `float32` | exact KNN ([sqlite-vec](https://github.com/asg017/sqlite-vec)) | most projects — fastest, exact results |
+| `turbo-quant` | ~4–8× smaller | approximate, unbiased inner-product | large codebases where index size matters |
+
+**TurboQuant** is a data-oblivious vector quantizer ([Zandieh et al., 2025](https://arxiv.org/abs/2504.19874)): it randomly rotates each embedding, quantizes per coordinate with optimal scalar codebooks, and adds a 1-bit QJL residual for an unbiased inner-product estimate. At 4-bit it compresses the index ~8× on disk with recall@10 ≈ 0.9, with no training or calibration.
+
+```bash
+ccc init                                  # interactive — prompts for backend
+ccc init --backend turbo-quant            # 4-bit (default bit-width)
+ccc init --backend turbo-quant --tq-bits 2  # 2-bit — ~16× smaller, lower recall
+ccc init --backend sqlite-vec             # explicit default
+```
+
+Switching backends requires re-initializing and re-indexing:
+
+```bash
+ccc reset --all -f
+ccc init --backend turbo-quant
+ccc index
+```
+
+> Higher `--tq-bits` (1–4) means better recall and a larger index. `sqlite-vec` stays the default for exact, low-latency search.
+
 ## Docker
 
 A Docker image is available for teams who want a reproducible, dependency-free
@@ -438,6 +466,9 @@ OpenAI embeddings (`text-embedding-3-*`, `text-embedding-ada-002`) are intention
 Per-project. Controls which files to index.
 
 ```yaml
+backend: sqlite-vec        # or "turbo-quant" — see Vector Backends
+tq_bits: 4                 # TurboQuant bit-width (1–4); only used when backend is turbo-quant
+
 include_patterns:
   - "**/*.py"
   - "**/*.js"
@@ -462,6 +493,8 @@ chunkers:
     module: example_toml_chunker:toml_chunker
 ```
 
+> `backend` is set at `ccc init` and baked into the index — changing it requires re-indexing (see [Vector Backends](#vector-backends)).
+
 > `.cocoindex_code/` is automatically added to `.gitignore` during init.
 
 Use `chunkers` when you want to control how a file type is split into chunks before indexing.

diff --git a/pyproject.toml b/pyproject.toml
@@ -107,10 +107,11 @@ files = ["src"]
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
-python_files = ["test_*.py"]
+python_files = ["test_*.py", "benchmark_*.py"]
 python_functions = ["test_*"]
-addopts = "-v --tb=short -m 'not docker_e2e'"
+addopts = "-v --tb=short -m 'not docker_e2e and not benchmark'"
 asyncio_mode = "auto"
 markers = [
     "docker_e2e: requires Docker; builds the image and runs containerized E2E tests. Run with: pytest -m docker_e2e",
+    "benchmark: TurboQuant vs sqlite-vec benchmark; prints a metrics table. Run with: pytest -m benchmark -s",
 ]
diff --git a/src/cocoindex_code/cli.py b/src/cocoindex_code/cli.py
@@ -21,6 +21,9 @@
 
 from .settings import (
     DEFAULT_ST_MODEL,
+    DEFAULT_TQ_BITS,
+    SUPPORTED_TQ_BITS,
+    Backend,
     EmbeddingSettings,
     cocoindex_db_path,
     default_project_settings,
@@ -34,6 +37,8 @@
     save_project_settings,
     target_sqlite_db_path,
     user_settings_path,
+    validate_backend,
+    validate_tq_bits,
 )
 
 app = _typer.Typer(
@@ -383,6 +388,53 @@ def _resolve_embedding_choice(
     return EmbeddingSettings(provider=provider, model=model.strip())
 
 
+def _resolve_backend(backend_flag: str | None, tq_bits_flag: int | None) -> tuple[Backend, int]:
+    """Resolve (backend, tq_bits) from flags, an interactive prompt, or defaults.
+
+    Explicit ``--backend`` wins. Otherwise prompt when stdin is a TTY; when not
+    interactive, fall back to the default backend (sqlite-vec).
+    """
+    bits = validate_tq_bits(tq_bits_flag) if tq_bits_flag is not None else DEFAULT_TQ_BITS
+
+    if backend_flag is not None:
+        return validate_backend(backend_flag), bits
+
+    if not sys.stdin.isatty():
+        return "sqlite-vec", bits
+
+    import questionary
+
+    backend = questionary.select(
+        "Vector backend",
+        choices=[
+            questionary.Choice(
+                title="sqlite-vec (default, exact nearest-neighbor)",
+                value="sqlite-vec",
+            ),
+            questionary.Choice(
+                title="turbo-quant (compressed, ~4-8x smaller index)",
+                value="turbo-quant",
+            ),
+        ],
+    ).ask()
+    if backend is None:  # cancelled
+        raise _typer.Exit(code=1)
+
+    if backend == "turbo-quant" and tq_bits_flag is None:
+        answer = questionary.select(
+            "TurboQuant bit-width (higher = better recall, larger index)",
+            # Choice titles are strings; values are ints. The default must match a
+            # choice *value* (int), not its title (str).
+            choices=[questionary.Choice(title=str(b), value=b) for b in SUPPORTED_TQ_BITS],
+            default=DEFAULT_TQ_BITS,  # type: ignore[arg-type]
+        ).ask()
+        if answer is None:
+            raise _typer.Exit(code=1)
+        bits = validate_tq_bits(answer)
+
+    return validate_backend(backend), bits
+
+
 def _ok_fail_tag(ok: bool) -> str:
     """Return a colored `[OK]` or `[FAIL]` tag string."""
     import click as _click
@@ -484,9 +536,33 @@ def init(
         "--litellm-model",
         help="Use the given LiteLLM model and skip provider/model prompts.",
     ),
+    backend: str | None = _typer.Option(
+        None,
+        "--backend",
+        help="Vector backend: 'sqlite-vec' (default, exact) or 'turbo-quant' (compressed).",
+    ),
+    tq_bits: int | None = _typer.Option(
+        None,
+        "--tq-bits",
+        help=f"TurboQuant bit-width {list(SUPPORTED_TQ_BITS)} (only for --backend turbo-quant).",
+    ),
     force: bool = _typer.Option(False, "-f", "--force", help="Skip parent directory warning"),
 ) -> None:
     """Initialize a project for cocoindex-code."""
+    # Validate backend flags early so bad input fails before any side effects.
+    if backend is not None:
+        try:
+            validate_backend(backend)
+        except ValueError as e:
+            _typer.echo(f"Error: {e}", err=True)
+            raise _typer.Exit(code=1) from e
+    if tq_bits is not None:
+        try:
+            validate_tq_bits(tq_bits)
+        except ValueError as e:
+            _typer.echo(f"Error: {e}", err=True)
+            raise _typer.Exit(code=1) from e
+
     cwd = Path.cwd().resolve()
     settings_file = project_settings_path(cwd)
 
@@ -520,9 +596,19 @@ def init(
             )
             raise _typer.Exit(code=1)
 
+    # Resolve the vector backend: explicit flag wins; otherwise prompt when
+    # interactive; otherwise fall back to the default (sqlite-vec).
+    resolved_backend, resolved_bits = _resolve_backend(backend, tq_bits)
+
     # Create project settings
-    save_project_settings(cwd, default_project_settings())
+    project_settings = default_project_settings()
+    project_settings.backend = resolved_backend
+    project_settings.tq_bits = resolved_bits
+    save_project_settings(cwd, project_settings)
     _typer.echo(f"Created project settings: {format_path_for_display(settings_file)}")
+    _typer.echo(f"Vector backend: {resolved_backend}")
+    if resolved_backend == "turbo-quant":
+        _typer.echo(f"TurboQuant bit-width: {resolved_bits}")
 
     # Add to .gitignore
     add_to_gitignore(cwd)

diff --git a/src/cocoindex_code/daemon.py b/src/cocoindex_code/daemon.py
@@ -437,14 +437,22 @@ async def _check_index_status(project_root_str: str) -> DoctorCheckResult:
         return DoctorCheckResult(name="Index Status", ok=True, details=details, errors=[])
 
     try:
+        from .tq_store import index_table_name
+
         conn = coco_sqlite.connect(str(db_path), load_vec=True)
         try:
             with conn.readonly() as db:
-                total_chunks = db.execute("SELECT COUNT(*) FROM code_chunks_vec").fetchone()[0]
-                file_rows = db.execute("SELECT DISTINCT file_path FROM code_chunks_vec").fetchall()
+                table = index_table_name(db)
+                if table is None:
+                    details.append("Index not created yet.")
+                    return DoctorCheckResult(
+                        name="Index Status", ok=True, details=details, errors=[]
+                    )
+                total_chunks = db.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0]
+                file_rows = db.execute(f"SELECT DISTINCT file_path FROM {table}").fetchall()
                 total_files = len(file_rows)
                 lang_rows = db.execute(
-                    "SELECT language, COUNT(*) FROM code_chunks_vec GROUP BY language"
+                    f"SELECT language, COUNT(*) FROM {table} GROUP BY language"
                 ).fetchall()
                 languages = {row[0]: row[1] for row in lang_rows}
         finally:

diff --git a/src/cocoindex_code/indexer.py b/src/cocoindex_code/indexer.py
@@ -4,6 +4,7 @@
 
 from collections.abc import Iterable
 from pathlib import Path, PurePath
+from typing import Any
 
 import cocoindex as coco
 from cocoindex.connectors import localfs, sqlite
@@ -15,14 +16,17 @@
 from pathspec import GitIgnoreSpec
 
 from .chunking import CHUNKER_REGISTRY
+from .schema import TqChunkRow
 from .settings import load_gitignore_spec, load_project_settings
 from .shared import (
     CODEBASE_DIR,
     EMBEDDER,
     INDEXING_EMBED_PARAMS,
     SQLITE_DB,
+    TURBO_QUANT,
     CodeChunk,
 )
+from .tq_store import TQ_TABLE, quantize_row
 
 # Chunking configuration
 CHUNK_SIZE = 1000
@@ -137,9 +141,14 @@ def is_file_included(self, path: PurePath) -> bool:
 @coco.fn(memo=True)
 async def process_file(
     file: localfs.File,
-    table: sqlite.TableTarget[CodeChunk],
+    table: sqlite.TableTarget[Any],
 ) -> None:
-    """Process a single file: chunk, embed, and store."""
+    """Process a single file: chunk, embed, and store.
+
+    The stored row type depends on the project backend: ``CodeChunk`` (raw
+    float32 in vec0) for sqlite-vec, or ``TqChunkRow`` (quantized) for
+    turbo-quant. ``table`` is the matching target built by ``indexer_main``.
+    """
     embedder = coco.use_context(EMBEDDER)
     indexing_params = coco.use_context(INDEXING_EMBED_PARAMS)
 
@@ -177,19 +186,37 @@ async def process_file(
         )
 
     id_gen = IdGenerator()
+    backend = ps.backend
+    tq = coco.use_context(TURBO_QUANT) if backend == "turbo-quant" else None
 
     async def process(chunk: Chunk) -> None:
-        table.declare_row(
-            row=CodeChunk(
-                id=await id_gen.next_id(chunk.text),
-                file_path=file.file_path.path.as_posix(),
-                language=language,
-                content=chunk.text,
-                start_line=chunk.start.line,
-                end_line=chunk.end.line,
-                embedding=await embedder.embed(chunk.text, **indexing_params),
+        chunk_id = await id_gen.next_id(chunk.text)
+        embedding = await embedder.embed(chunk.text, **indexing_params)
+        if tq is not None:
+            table.declare_row(
+                row=quantize_row(
+                    tq,
+                    chunk_id=chunk_id,
+                    file_path=file.file_path.path.as_posix(),
+                    language=language,
+                    content=chunk.text,
+                    start_line=chunk.start.line,
+                    end_line=chunk.end.line,
+                    embedding=embedding,
+                )
+            )
+        else:
+            table.declare_row(
+                row=CodeChunk(
+                    id=chunk_id,
+                    file_path=file.file_path.path.as_posix(),
+                    language=language,
+                    content=chunk.text,
+                    start_line=chunk.start.line,
+                    end_line=chunk.end.line,
+                    embedding=embedding,
+                )
             )
-        )
 
     await coco.map(process, chunks)
 
@@ -201,18 +228,40 @@ async def indexer_main() -> None:
     ps = load_project_settings(project_root)
     gitignore_spec = load_gitignore_spec(project_root)
 
-    table = await sqlite.mount_table_target(
-        db=SQLITE_DB,
-        table_name="code_chunks_vec",
-        table_schema=await sqlite.TableSchema.from_class(
-            CodeChunk,
-            primary_key=["id"],
-        ),
-        virtual_table_def=Vec0TableDef(
-            partition_key_columns=["language"],
-            auxiliary_columns=["file_path", "content", "start_line", "end_line"],
-        ),
-    )
+    table: sqlite.TableTarget[Any]
+    if ps.backend == "turbo-quant":
+        tq = coco.use_context(TURBO_QUANT)
+        # Persist index metadata (bits/dim/seed) so the store can regenerate the
+        # rotation/QJL matrices at query time.
+        db = coco.use_context(SQLITE_DB)
+        from .tq_store import create_metadata_table, write_metadata
+
+        # The chunk table itself is created by mount_table_target below; here we
+        # only own the side metadata table.
+        with db.transaction() as conn:
+            create_metadata_table(conn)
+            write_metadata(conn, bits=tq.bits, dim=tq.dim, seed=tq.seed)
+        table = await sqlite.mount_table_target(
+            db=SQLITE_DB,
+            table_name=TQ_TABLE,
+            table_schema=await sqlite.TableSchema.from_class(
+                TqChunkRow,
+                primary_key=["id"],
+            ),
+        )
+    else:
+        table = await sqlite.mount_table_target(
+            db=SQLITE_DB,
+            table_name="code_chunks_vec",
+            table_schema=await sqlite.TableSchema.from_class(
+                CodeChunk,
+                primary_key=["id"],
+            ),
+            virtual_table_def=Vec0TableDef(
+                partition_key_columns=["language"],
+                auxiliary_columns=["file_path", "content", "start_line", "end_line"],
+            ),
+        )
 
     base_matcher = PatternFilePathMatcher(
         included_patterns=ps.include_patterns,