From f9638475dcc102c7d16f895e1be67bcb0b1d2640 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Wed, 28 Jan 2026 14:48:24 -0500 Subject: [PATCH] feat: add TextQuery and VectorQuery classes Add query model classes for the document search API: - TextQuery: for full-text search with field, query, boost, slop - VectorQuery: for vector search with field, values, sparse_values Both classes support dict-like access and serialize to API format. Closes SDK-108 --- pinecone/__init__.py | 2 + pinecone/db_data/dataclasses/__init__.py | 4 + pinecone/db_data/dataclasses/text_query.py | 49 +++++++++ pinecone/db_data/dataclasses/vector_query.py | 58 ++++++++++ tests/unit/data/test_query_classes.py | 108 +++++++++++++++++++ 5 files changed, 221 insertions(+) create mode 100644 pinecone/db_data/dataclasses/text_query.py create mode 100644 pinecone/db_data/dataclasses/vector_query.py create mode 100644 tests/unit/data/test_query_classes.py diff --git a/pinecone/__init__.py b/pinecone/__init__.py index c1b0f6acb..037b5bc14 100644 --- a/pinecone/__init__.py +++ b/pinecone/__init__.py @@ -45,6 +45,8 @@ "SearchQuery": ("pinecone.db_data.dataclasses", "SearchQuery"), "SearchQueryVector": ("pinecone.db_data.dataclasses", "SearchQueryVector"), "SearchRerank": ("pinecone.db_data.dataclasses", "SearchRerank"), + "TextQuery": ("pinecone.db_data.dataclasses", "TextQuery"), + "VectorQuery": ("pinecone.db_data.dataclasses", "VectorQuery"), "FetchResponse": ("pinecone.db_data.dataclasses", "FetchResponse"), "FetchByMetadataResponse": ("pinecone.db_data.dataclasses", "FetchByMetadataResponse"), "DeleteRequest": ("pinecone.db_data.models", "DeleteRequest"), diff --git a/pinecone/db_data/dataclasses/__init__.py b/pinecone/db_data/dataclasses/__init__.py index d6709e8ab..dbf79ba46 100644 --- a/pinecone/db_data/dataclasses/__init__.py +++ b/pinecone/db_data/dataclasses/__init__.py @@ -8,6 +8,8 @@ from .query_response import QueryResponse from .upsert_response import UpsertResponse from .update_response import UpdateResponse +from .text_query import TextQuery +from .vector_query import VectorQuery __all__ = [ "SparseValues", @@ -21,4 +23,6 @@ "QueryResponse", "UpsertResponse", "UpdateResponse", + "TextQuery", + "VectorQuery", ] diff --git a/pinecone/db_data/dataclasses/text_query.py b/pinecone/db_data/dataclasses/text_query.py new file mode 100644 index 000000000..d3b7997ae --- /dev/null +++ b/pinecone/db_data/dataclasses/text_query.py @@ -0,0 +1,49 @@ +"""TextQuery class for full-text search queries.""" + +from __future__ import annotations + +from dataclasses import dataclass + +from .utils import DictLike + + +@dataclass +class TextQuery(DictLike): + """A text query for full-text search. + + Used as the ``score_by`` parameter in ``search_documents()`` to perform + full-text search on a specified field. + + :param field: The name of the field to search. + :param query: The search query string. + :param boost: Optional boost factor for this query's score contribution. + :param slop: Optional slop parameter for phrase queries, controlling + how many positions apart terms can be. + + Example usage:: + + from pinecone import TextQuery + + results = index.search_documents( + namespace="movies", + score_by=TextQuery(field="title", query='return "pink panther"'), + top_k=10, + ) + """ + + field: str + query: str + boost: float | None = None + slop: int | None = None + + def to_dict(self) -> dict: + """Serialize to API format. + + :returns: Dictionary representation for the API. + """ + result: dict = {"field": self.field, "query": self.query} + if self.boost is not None: + result["boost"] = self.boost + if self.slop is not None: + result["slop"] = self.slop + return result diff --git a/pinecone/db_data/dataclasses/vector_query.py b/pinecone/db_data/dataclasses/vector_query.py new file mode 100644 index 000000000..6b76bc81e --- /dev/null +++ b/pinecone/db_data/dataclasses/vector_query.py @@ -0,0 +1,58 @@ +"""VectorQuery class for vector similarity search queries.""" + +from __future__ import annotations + +from dataclasses import dataclass + +from .utils import DictLike +from .sparse_values import SparseValues + + +@dataclass +class VectorQuery(DictLike): + """A vector query for similarity search. + + Used as the ``score_by`` parameter in ``search_documents()`` to perform + vector similarity search on a specified field. + + :param field: The name of the vector field to search. + :param values: Dense vector values for similarity search. + :param sparse_values: Sparse vector values for hybrid search. + + Example usage:: + + from pinecone import VectorQuery + + # Dense vector query + results = index.search_documents( + namespace="movies", + score_by=VectorQuery(field="embedding", values=[0.1, 0.2, 0.3, ...]), + top_k=10, + ) + + # Sparse vector query + results = index.search_documents( + namespace="movies", + score_by=VectorQuery( + field="sparse_embedding", + sparse_values=SparseValues(indices=[1, 5, 10], values=[0.5, 0.3, 0.2]), + ), + top_k=10, + ) + """ + + field: str + values: list[float] | None = None + sparse_values: SparseValues | None = None + + def to_dict(self) -> dict: + """Serialize to API format. + + :returns: Dictionary representation for the API. + """ + result: dict = {"field": self.field} + if self.values is not None: + result["values"] = self.values + if self.sparse_values is not None: + result["sparse_values"] = self.sparse_values.to_dict() + return result diff --git a/tests/unit/data/test_query_classes.py b/tests/unit/data/test_query_classes.py new file mode 100644 index 000000000..3c456d756 --- /dev/null +++ b/tests/unit/data/test_query_classes.py @@ -0,0 +1,108 @@ +"""Tests for TextQuery and VectorQuery classes.""" + +from pinecone.db_data.dataclasses import TextQuery, VectorQuery, SparseValues + + +class TestTextQuery: + def test_required_params(self): + query = TextQuery(field="title", query="pink panther") + assert query.field == "title" + assert query.query == "pink panther" + assert query.boost is None + assert query.slop is None + + def test_to_dict_minimal(self): + query = TextQuery(field="title", query="pink panther") + result = query.to_dict() + assert result == {"field": "title", "query": "pink panther"} + + def test_to_dict_with_boost(self): + query = TextQuery(field="title", query="pink panther", boost=2.0) + result = query.to_dict() + assert result == {"field": "title", "query": "pink panther", "boost": 2.0} + + def test_to_dict_with_slop(self): + query = TextQuery(field="title", query="pink panther", slop=2) + result = query.to_dict() + assert result == {"field": "title", "query": "pink panther", "slop": 2} + + def test_to_dict_with_all_options(self): + query = TextQuery(field="title", query="pink panther", boost=1.5, slop=3) + result = query.to_dict() + assert result == {"field": "title", "query": "pink panther", "boost": 1.5, "slop": 3} + + def test_dict_like_access(self): + query = TextQuery(field="title", query="pink panther", boost=2.0) + assert query["field"] == "title" + assert query["query"] == "pink panther" + assert query["boost"] == 2.0 + + def test_dict_like_get(self): + query = TextQuery(field="title", query="pink panther") + assert query.get("field") == "title" + assert query.get("boost") is None + assert query.get("nonexistent", "default") == "default" + + +class TestVectorQuery: + def test_required_params(self): + query = VectorQuery(field="embedding") + assert query.field == "embedding" + assert query.values is None + assert query.sparse_values is None + + def test_to_dict_minimal(self): + query = VectorQuery(field="embedding") + result = query.to_dict() + assert result == {"field": "embedding"} + + def test_to_dict_with_values(self): + query = VectorQuery(field="embedding", values=[0.1, 0.2, 0.3]) + result = query.to_dict() + assert result == {"field": "embedding", "values": [0.1, 0.2, 0.3]} + + def test_to_dict_with_sparse_values(self): + sparse = SparseValues(indices=[1, 5, 10], values=[0.5, 0.3, 0.2]) + query = VectorQuery(field="sparse_embedding", sparse_values=sparse) + result = query.to_dict() + assert result == { + "field": "sparse_embedding", + "sparse_values": {"indices": [1, 5, 10], "values": [0.5, 0.3, 0.2]}, + } + + def test_to_dict_with_both_values(self): + sparse = SparseValues(indices=[1, 2], values=[0.5, 0.5]) + query = VectorQuery(field="hybrid", values=[0.1, 0.2, 0.3], sparse_values=sparse) + result = query.to_dict() + assert result == { + "field": "hybrid", + "values": [0.1, 0.2, 0.3], + "sparse_values": {"indices": [1, 2], "values": [0.5, 0.5]}, + } + + def test_dict_like_access(self): + query = VectorQuery(field="embedding", values=[0.1, 0.2]) + assert query["field"] == "embedding" + assert query["values"] == [0.1, 0.2] + + def test_dict_like_get(self): + query = VectorQuery(field="embedding") + assert query.get("field") == "embedding" + assert query.get("values") is None + assert query.get("nonexistent", "default") == "default" + + +class TestQueryUsageExamples: + """Test the usage examples from the ticket.""" + + def test_text_query_example(self): + query = TextQuery(field="title", query='return "pink panther"') + result = query.to_dict() + assert result["field"] == "title" + assert result["query"] == 'return "pink panther"' + + def test_vector_query_example(self): + query = VectorQuery(field="embedding", values=[0.1, 0.2, 0.3]) + result = query.to_dict() + assert result["field"] == "embedding" + assert result["values"] == [0.1, 0.2, 0.3]