-
Notifications
You must be signed in to change notification settings - Fork 115
feat: add TextQuery and VectorQuery classes #586
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| """TextQuery class for full-text search queries.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| from dataclasses import dataclass | ||
|
|
||
| from .utils import DictLike | ||
|
|
||
|
|
||
| @dataclass | ||
| class TextQuery(DictLike): | ||
| """A text query for full-text search. | ||
|
|
||
| Used as the ``score_by`` parameter in ``search_documents()`` to perform | ||
| full-text search on a specified field. | ||
|
|
||
| :param field: The name of the field to search. | ||
| :param query: The search query string. | ||
| :param boost: Optional boost factor for this query's score contribution. | ||
| :param slop: Optional slop parameter for phrase queries, controlling | ||
| how many positions apart terms can be. | ||
|
|
||
| Example usage:: | ||
|
|
||
| from pinecone import TextQuery | ||
|
|
||
| results = index.search_documents( | ||
| namespace="movies", | ||
| score_by=TextQuery(field="title", query='return "pink panther"'), | ||
| top_k=10, | ||
| ) | ||
| """ | ||
|
|
||
| field: str | ||
| query: str | ||
| boost: float | None = None | ||
| slop: int | None = None | ||
|
|
||
| def to_dict(self) -> dict: | ||
| """Serialize to API format. | ||
|
|
||
| :returns: Dictionary representation for the API. | ||
| """ | ||
| result: dict = {"field": self.field, "query": self.query} | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| if self.boost is not None: | ||
| result["boost"] = self.boost | ||
| if self.slop is not None: | ||
| result["slop"] = self.slop | ||
| return result | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,58 @@ | ||
| """VectorQuery class for vector similarity search queries.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| from dataclasses import dataclass | ||
|
|
||
| from .utils import DictLike | ||
| from .sparse_values import SparseValues | ||
|
|
||
|
|
||
| @dataclass | ||
| class VectorQuery(DictLike): | ||
| """A vector query for similarity search. | ||
|
|
||
| Used as the ``score_by`` parameter in ``search_documents()`` to perform | ||
| vector similarity search on a specified field. | ||
|
|
||
| :param field: The name of the vector field to search. | ||
| :param values: Dense vector values for similarity search. | ||
| :param sparse_values: Sparse vector values for hybrid search. | ||
|
|
||
| Example usage:: | ||
|
|
||
| from pinecone import VectorQuery | ||
|
|
||
| # Dense vector query | ||
| results = index.search_documents( | ||
| namespace="movies", | ||
| score_by=VectorQuery(field="embedding", values=[0.1, 0.2, 0.3, ...]), | ||
| top_k=10, | ||
| ) | ||
|
|
||
| # Sparse vector query | ||
| results = index.search_documents( | ||
| namespace="movies", | ||
| score_by=VectorQuery( | ||
| field="sparse_embedding", | ||
| sparse_values=SparseValues(indices=[1, 5, 10], values=[0.5, 0.3, 0.2]), | ||
| ), | ||
| top_k=10, | ||
| ) | ||
| """ | ||
|
|
||
| field: str | ||
| values: list[float] | None = None | ||
| sparse_values: SparseValues | None = None | ||
|
|
||
| def to_dict(self) -> dict: | ||
| """Serialize to API format. | ||
|
|
||
| :returns: Dictionary representation for the API. | ||
| """ | ||
| result: dict = {"field": self.field} | ||
| if self.values is not None: | ||
| result["values"] = self.values | ||
| if self.sparse_values is not None: | ||
| result["sparse_values"] = self.sparse_values.to_dict() | ||
| return result |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,108 @@ | ||
| """Tests for TextQuery and VectorQuery classes.""" | ||
|
|
||
| from pinecone.db_data.dataclasses import TextQuery, VectorQuery, SparseValues | ||
|
|
||
|
|
||
| class TestTextQuery: | ||
| def test_required_params(self): | ||
| query = TextQuery(field="title", query="pink panther") | ||
| assert query.field == "title" | ||
| assert query.query == "pink panther" | ||
| assert query.boost is None | ||
| assert query.slop is None | ||
|
|
||
| def test_to_dict_minimal(self): | ||
| query = TextQuery(field="title", query="pink panther") | ||
| result = query.to_dict() | ||
| assert result == {"field": "title", "query": "pink panther"} | ||
|
|
||
| def test_to_dict_with_boost(self): | ||
| query = TextQuery(field="title", query="pink panther", boost=2.0) | ||
| result = query.to_dict() | ||
| assert result == {"field": "title", "query": "pink panther", "boost": 2.0} | ||
|
|
||
| def test_to_dict_with_slop(self): | ||
| query = TextQuery(field="title", query="pink panther", slop=2) | ||
| result = query.to_dict() | ||
| assert result == {"field": "title", "query": "pink panther", "slop": 2} | ||
|
|
||
| def test_to_dict_with_all_options(self): | ||
| query = TextQuery(field="title", query="pink panther", boost=1.5, slop=3) | ||
| result = query.to_dict() | ||
| assert result == {"field": "title", "query": "pink panther", "boost": 1.5, "slop": 3} | ||
|
|
||
| def test_dict_like_access(self): | ||
| query = TextQuery(field="title", query="pink panther", boost=2.0) | ||
| assert query["field"] == "title" | ||
| assert query["query"] == "pink panther" | ||
| assert query["boost"] == 2.0 | ||
|
|
||
| def test_dict_like_get(self): | ||
| query = TextQuery(field="title", query="pink panther") | ||
| assert query.get("field") == "title" | ||
| assert query.get("boost") is None | ||
| assert query.get("nonexistent", "default") == "default" | ||
|
|
||
|
|
||
| class TestVectorQuery: | ||
| def test_required_params(self): | ||
| query = VectorQuery(field="embedding") | ||
| assert query.field == "embedding" | ||
| assert query.values is None | ||
| assert query.sparse_values is None | ||
|
|
||
| def test_to_dict_minimal(self): | ||
| query = VectorQuery(field="embedding") | ||
| result = query.to_dict() | ||
| assert result == {"field": "embedding"} | ||
|
|
||
| def test_to_dict_with_values(self): | ||
| query = VectorQuery(field="embedding", values=[0.1, 0.2, 0.3]) | ||
| result = query.to_dict() | ||
| assert result == {"field": "embedding", "values": [0.1, 0.2, 0.3]} | ||
|
|
||
| def test_to_dict_with_sparse_values(self): | ||
| sparse = SparseValues(indices=[1, 5, 10], values=[0.5, 0.3, 0.2]) | ||
| query = VectorQuery(field="sparse_embedding", sparse_values=sparse) | ||
| result = query.to_dict() | ||
| assert result == { | ||
| "field": "sparse_embedding", | ||
| "sparse_values": {"indices": [1, 5, 10], "values": [0.5, 0.3, 0.2]}, | ||
| } | ||
|
|
||
| def test_to_dict_with_both_values(self): | ||
| sparse = SparseValues(indices=[1, 2], values=[0.5, 0.5]) | ||
| query = VectorQuery(field="hybrid", values=[0.1, 0.2, 0.3], sparse_values=sparse) | ||
| result = query.to_dict() | ||
| assert result == { | ||
| "field": "hybrid", | ||
| "values": [0.1, 0.2, 0.3], | ||
| "sparse_values": {"indices": [1, 2], "values": [0.5, 0.5]}, | ||
| } | ||
|
|
||
| def test_dict_like_access(self): | ||
| query = VectorQuery(field="embedding", values=[0.1, 0.2]) | ||
| assert query["field"] == "embedding" | ||
| assert query["values"] == [0.1, 0.2] | ||
|
|
||
| def test_dict_like_get(self): | ||
| query = VectorQuery(field="embedding") | ||
| assert query.get("field") == "embedding" | ||
| assert query.get("values") is None | ||
| assert query.get("nonexistent", "default") == "default" | ||
|
|
||
|
|
||
| class TestQueryUsageExamples: | ||
| """Test the usage examples from the ticket.""" | ||
|
|
||
| def test_text_query_example(self): | ||
| query = TextQuery(field="title", query='return "pink panther"') | ||
| result = query.to_dict() | ||
| assert result["field"] == "title" | ||
| assert result["query"] == 'return "pink panther"' | ||
|
|
||
| def test_vector_query_example(self): | ||
| query = VectorQuery(field="embedding", values=[0.1, 0.2, 0.3]) | ||
| result = query.to_dict() | ||
| assert result["field"] == "embedding" | ||
| assert result["values"] == [0.1, 0.2, 0.3] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Inconsistent method naming with similar query classes
Low Severity
The new
TextQueryandVectorQueryclasses useto_dict()for serialization, while semantically similar query classes in the same module (SearchQuery,SearchQueryVector,SearchRerank) all useas_dict(). This naming inconsistency within the query-related classes could confuse developers expecting a uniform API.Additional Locations (1)
pinecone/db_data/dataclasses/vector_query.py#L47-L48