Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ LOG_LEVEL=INFO
# EMBEDDING_MODEL_CONFIG__TRANSPORT=openai
# EMBEDDING_MODEL_CONFIG__MODEL=text-embedding-3-small
# EMBEDDING_MODEL_CONFIG__OVERRIDES__BASE_URL=
# Asymmetric-encoder providers (e.g. some NVIDIA NIM embedding models like
# `nvidia/llama-nemotron-embed-1b-v2`) require an `input_type` parameter and
# will fail without it. Set this to the value the provider expects — usually
# `query` for indexing/search use. Leave unset for plain OpenAI / Gemini.
# EMBEDDING_MODEL_CONFIG__INPUT_TYPE=query
# EMBEDDING_MODEL_CONFIG__OVERRIDES__API_KEY_ENV=

# LANGFUSE_HOST=
Expand Down
9 changes: 9 additions & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,13 @@ class ConfiguredEmbeddingModelSettings(BaseModel):
transport: EmbeddingTransport = "openai"
overrides: ModelOverrideSettings = Field(default_factory=ModelOverrideSettings)
dimensions_mode: EmbeddingDimensionsMode = "auto"
# Optional `input_type` to forward to the embedding endpoint. Required by
# some asymmetric-encoder providers — notably NVIDIA NIM models such as
# `nvidia/llama-nemotron-embed-1b-v2` which fail with HTTP 400 when this
# field is missing. Common values: "query", "passage" (NIM), or
# provider-specific equivalents. Default `None` preserves the historical
# behaviour for plain OpenAI / Gemini, which don't accept this parameter.
input_type: str | None = None

@model_validator(mode="before")
@classmethod
Expand Down Expand Up @@ -337,6 +344,7 @@ class EmbeddingModelConfig(BaseModel):
transport: EmbeddingTransport = "openai"
api_key: str | None = None
base_url: str | None = None
input_type: str | None = None

@model_validator(mode="before")
@classmethod
Expand Down Expand Up @@ -459,6 +467,7 @@ def resolve_embedding_model_config(
transport=configured.transport,
api_key=api_key,
base_url=configured.overrides.base_url,
input_type=configured.input_type,
)


Expand Down
9 changes: 9 additions & 0 deletions src/embedding_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,9 @@ def __init__(
self.model: str = config.model
self.vector_dimensions: int = vector_dimensions
self.send_dimensions: bool = send_dimensions
# Optional `input_type` to forward to the embedding endpoint. Required
# by some asymmetric-encoder providers (notably NVIDIA NIM models).
self.input_type: str | None = config.input_type

if self.transport == "gemini":
if not config.api_key:
Expand Down Expand Up @@ -237,6 +240,8 @@ async def _call_openai() -> list[float]:
openai_kwargs: dict[str, Any] = {"model": self.model, "input": [query]}
if self.send_dimensions:
openai_kwargs["dimensions"] = self.vector_dimensions
if self.input_type is not None:
openai_kwargs["input_type"] = self.input_type
response = await openai_client.embeddings.create(**openai_kwargs)
return self._validate_embedding_dimensions(response.data[0].embedding)

Expand Down Expand Up @@ -290,6 +295,8 @@ async def _embed_batch(batch: list[str] = batch) -> list[list[float]]:
}
if self.send_dimensions:
openai_kwargs["dimensions"] = self.vector_dimensions
if self.input_type is not None:
openai_kwargs["input_type"] = self.input_type
response = await self.client.embeddings.create(**openai_kwargs)
batch_embeddings.extend(
[
Expand Down Expand Up @@ -455,6 +462,8 @@ async def _call_provider() -> dict[str, dict[int, list[float]]]:
}
if self.send_dimensions:
openai_kwargs["dimensions"] = self.vector_dimensions
if self.input_type is not None:
openai_kwargs["input_type"] = self.input_type
response = await self.client.embeddings.create(**openai_kwargs)
for item, embedding_data in zip(batch, response.data, strict=True):
result[item.text_id][item.chunk_index] = (
Expand Down
21 changes: 21 additions & 0 deletions tests/llm/test_embedding_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,27 @@ def test_resolve_send_dimensions_always_overrides_ada_rejecting_allowlist(
assert s.resolve_send_dimensions() is True


def test_input_type_default_is_none(monkeypatch: pytest.MonkeyPatch) -> None:
"""By default no input_type is sent — preserves OpenAI / Gemini behaviour."""
s = _build_embedding_settings({}, monkeypatch)
assert s.MODEL_CONFIG.input_type is None


def test_input_type_propagates_through_resolved_config(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""When configured, input_type is preserved end-to-end through resolution."""
from src.config import resolve_embedding_model_config

s = _build_embedding_settings(
{"EMBEDDING_MODEL_CONFIG__INPUT_TYPE": "query"},
monkeypatch,
)
assert s.MODEL_CONFIG.input_type == "query"
resolved = resolve_embedding_model_config(s.MODEL_CONFIG)
assert resolved.input_type == "query"


def test_resolve_send_dimensions_never_returns_false_regardless(
monkeypatch: pytest.MonkeyPatch,
) -> None:
Expand Down