diff --git a/.env.template b/.env.template index f27c81f57..1a5bf058e 100644 --- a/.env.template +++ b/.env.template @@ -21,6 +21,11 @@ LOG_LEVEL=INFO # EMBEDDING_MODEL_CONFIG__TRANSPORT=openai # EMBEDDING_MODEL_CONFIG__MODEL=text-embedding-3-small # EMBEDDING_MODEL_CONFIG__OVERRIDES__BASE_URL= +# Asymmetric-encoder providers (e.g. some NVIDIA NIM embedding models like +# `nvidia/llama-nemotron-embed-1b-v2`) require an `input_type` parameter and +# will fail without it. Set this to the value the provider expects — usually +# `query` for indexing/search use. Leave unset for plain OpenAI / Gemini. +# EMBEDDING_MODEL_CONFIG__INPUT_TYPE=query # EMBEDDING_MODEL_CONFIG__OVERRIDES__API_KEY_ENV= # LANGFUSE_HOST= diff --git a/src/config.py b/src/config.py index b435daa02..c806b3690 100644 --- a/src/config.py +++ b/src/config.py @@ -301,6 +301,13 @@ class ConfiguredEmbeddingModelSettings(BaseModel): transport: EmbeddingTransport = "openai" overrides: ModelOverrideSettings = Field(default_factory=ModelOverrideSettings) dimensions_mode: EmbeddingDimensionsMode = "auto" + # Optional `input_type` to forward to the embedding endpoint. Required by + # some asymmetric-encoder providers — notably NVIDIA NIM models such as + # `nvidia/llama-nemotron-embed-1b-v2` which fail with HTTP 400 when this + # field is missing. Common values: "query", "passage" (NIM), or + # provider-specific equivalents. Default `None` preserves the historical + # behaviour for plain OpenAI / Gemini, which don't accept this parameter. + input_type: str | None = None @model_validator(mode="before") @classmethod @@ -337,6 +344,7 @@ class EmbeddingModelConfig(BaseModel): transport: EmbeddingTransport = "openai" api_key: str | None = None base_url: str | None = None + input_type: str | None = None @model_validator(mode="before") @classmethod @@ -459,6 +467,7 @@ def resolve_embedding_model_config( transport=configured.transport, api_key=api_key, base_url=configured.overrides.base_url, + input_type=configured.input_type, ) diff --git a/src/embedding_client.py b/src/embedding_client.py index 60516bc55..e7b5230da 100644 --- a/src/embedding_client.py +++ b/src/embedding_client.py @@ -152,6 +152,9 @@ def __init__( self.model: str = config.model self.vector_dimensions: int = vector_dimensions self.send_dimensions: bool = send_dimensions + # Optional `input_type` to forward to the embedding endpoint. Required + # by some asymmetric-encoder providers (notably NVIDIA NIM models). + self.input_type: str | None = config.input_type if self.transport == "gemini": if not config.api_key: @@ -237,6 +240,8 @@ async def _call_openai() -> list[float]: openai_kwargs: dict[str, Any] = {"model": self.model, "input": [query]} if self.send_dimensions: openai_kwargs["dimensions"] = self.vector_dimensions + if self.input_type is not None: + openai_kwargs["input_type"] = self.input_type response = await openai_client.embeddings.create(**openai_kwargs) return self._validate_embedding_dimensions(response.data[0].embedding) @@ -290,6 +295,8 @@ async def _embed_batch(batch: list[str] = batch) -> list[list[float]]: } if self.send_dimensions: openai_kwargs["dimensions"] = self.vector_dimensions + if self.input_type is not None: + openai_kwargs["input_type"] = self.input_type response = await self.client.embeddings.create(**openai_kwargs) batch_embeddings.extend( [ @@ -455,6 +462,8 @@ async def _call_provider() -> dict[str, dict[int, list[float]]]: } if self.send_dimensions: openai_kwargs["dimensions"] = self.vector_dimensions + if self.input_type is not None: + openai_kwargs["input_type"] = self.input_type response = await self.client.embeddings.create(**openai_kwargs) for item, embedding_data in zip(batch, response.data, strict=True): result[item.text_id][item.chunk_index] = ( diff --git a/tests/llm/test_embedding_client.py b/tests/llm/test_embedding_client.py index fcee66d9f..0049068e1 100644 --- a/tests/llm/test_embedding_client.py +++ b/tests/llm/test_embedding_client.py @@ -328,6 +328,27 @@ def test_resolve_send_dimensions_always_overrides_ada_rejecting_allowlist( assert s.resolve_send_dimensions() is True +def test_input_type_default_is_none(monkeypatch: pytest.MonkeyPatch) -> None: + """By default no input_type is sent — preserves OpenAI / Gemini behaviour.""" + s = _build_embedding_settings({}, monkeypatch) + assert s.MODEL_CONFIG.input_type is None + + +def test_input_type_propagates_through_resolved_config( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """When configured, input_type is preserved end-to-end through resolution.""" + from src.config import resolve_embedding_model_config + + s = _build_embedding_settings( + {"EMBEDDING_MODEL_CONFIG__INPUT_TYPE": "query"}, + monkeypatch, + ) + assert s.MODEL_CONFIG.input_type == "query" + resolved = resolve_embedding_model_config(s.MODEL_CONFIG) + assert resolved.input_type == "query" + + def test_resolve_send_dimensions_never_returns_false_regardless( monkeypatch: pytest.MonkeyPatch, ) -> None: