diff --git a/pinecone/__init__.py b/pinecone/__init__.py index b0348bf2..dc70787e 100644 --- a/pinecone/__init__.py +++ b/pinecone/__init__.py @@ -47,6 +47,8 @@ "SearchRerank": ("pinecone.db_data.dataclasses", "SearchRerank"), "TextQuery": ("pinecone.db_data.dataclasses", "TextQuery"), "VectorQuery": ("pinecone.db_data.dataclasses", "VectorQuery"), + "Document": ("pinecone.db_data.dataclasses", "Document"), + "DocumentSearchResponse": ("pinecone.db_data.dataclasses", "DocumentSearchResponse"), # Factory functions for query objects "text_query": ("pinecone.db_data.query_helpers", "text_query"), "vector_query": ("pinecone.db_data.query_helpers", "vector_query"), diff --git a/pinecone/db_data/dataclasses/__init__.py b/pinecone/db_data/dataclasses/__init__.py index dbf79ba4..64e3f63a 100644 --- a/pinecone/db_data/dataclasses/__init__.py +++ b/pinecone/db_data/dataclasses/__init__.py @@ -10,6 +10,8 @@ from .update_response import UpdateResponse from .text_query import TextQuery from .vector_query import VectorQuery +from .document import Document +from .document_search_response import DocumentSearchResponse __all__ = [ "SparseValues", @@ -25,4 +27,6 @@ "UpdateResponse", "TextQuery", "VectorQuery", + "Document", + "DocumentSearchResponse", ] diff --git a/pinecone/db_data/dataclasses/document.py b/pinecone/db_data/dataclasses/document.py new file mode 100644 index 00000000..a778e169 --- /dev/null +++ b/pinecone/db_data/dataclasses/document.py @@ -0,0 +1,169 @@ +"""Document class for document search responses.""" + +from __future__ import annotations + +from typing import Any, Iterator + + +class Document: + """A document returned from a document search operation. + + Documents have standard fields (``id`` and ``score``) plus dynamic fields + that can be accessed via attribute access, dict-style access, or the + ``get()`` method. + + :param id: The unique identifier for the document. + :param score: The relevance score for the document. + :param fields: Additional dynamic fields from the document. + + Example usage:: + + # Assuming results from index.search_documents() + for doc in results.documents: + print(doc.id) # Standard field + print(doc.score) # Standard field + print(doc.title) # Attribute access to dynamic field + print(doc["title"]) # Dict access to dynamic field + print(doc.get("title")) # Safe access (returns None if missing) + print(doc.get("title", "N/A")) # Safe access with default + """ + + __slots__ = ("_id", "_score", "_fields") + + _id: str + _score: float + _fields: dict[str, Any] + + def __init__(self, id: str, score: float, **fields: Any) -> None: + """Initialize a Document. + + :param id: The unique identifier for the document. + :param score: The relevance score for the document. + :param fields: Additional dynamic fields from the document. + """ + object.__setattr__(self, "_id", id) + object.__setattr__(self, "_score", score) + object.__setattr__(self, "_fields", fields) + + @property + def id(self) -> str: + """The unique identifier for the document.""" + return self._id + + @property + def score(self) -> float: + """The relevance score for the document.""" + return self._score + + def __getattr__(self, name: str) -> Any: + """Allow attribute access to dynamic fields. + + :param name: The field name to access. + :returns: The field value. + :raises AttributeError: If the field does not exist. + """ + try: + return self._fields[name] + except KeyError: + raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'") + + def __getitem__(self, key: str) -> Any: + """Allow dict-style access to fields. + + :param key: The field name to access. + :returns: The field value. + :raises KeyError: If the field does not exist. + """ + if key == "id": + return self._id + if key == "score": + return self._score + if key in self._fields: + return self._fields[key] + raise KeyError(key) + + def get(self, key: str, default: Any = None) -> Any: + """Safe access with default value. + + :param key: The field name to access. + :param default: The default value if the field does not exist. + :returns: The field value or the default. + """ + try: + return self[key] + except KeyError: + return default + + def keys(self) -> list[str]: + """Return all field names. + + :returns: List of all field names including id and score. + """ + return ["id", "score"] + list(self._fields.keys()) + + def values(self) -> list[Any]: + """Return all field values. + + :returns: List of all field values. + """ + return [self._id, self._score] + list(self._fields.values()) + + def items(self) -> list[tuple[str, Any]]: + """Return all field name-value pairs. + + :returns: List of (name, value) tuples. + """ + return [("id", self._id), ("score", self._score)] + list(self._fields.items()) + + def __contains__(self, key: str) -> bool: + """Check if a field exists. + + :param key: The field name to check. + :returns: True if the field exists, False otherwise. + """ + return key in ("id", "score") or key in self._fields + + def __iter__(self) -> Iterator[str]: + """Iterate over field names. + + :returns: Iterator over field names. + """ + yield "id" + yield "score" + yield from self._fields + + def __len__(self) -> int: + """Return the number of fields. + + :returns: Total number of fields including id and score. + """ + return 2 + len(self._fields) + + def __repr__(self) -> str: + """Return a string representation. + + :returns: String representation of the document. + """ + field_str = ", ".join(f"{k}={v!r}" for k, v in self._fields.items()) + if field_str: + return f"Document(id={self._id!r}, score={self._score!r}, {field_str})" + return f"Document(id={self._id!r}, score={self._score!r})" + + def __eq__(self, other: object) -> bool: + """Check equality with another Document. + + :param other: The object to compare with. + :returns: True if equal, False otherwise. + """ + if not isinstance(other, Document): + return NotImplemented + return ( + self._id == other._id and self._score == other._score and self._fields == other._fields + ) + + def to_dict(self) -> dict[str, Any]: + """Convert to a dictionary. + + :returns: Dictionary representation of the document. + """ + return {"id": self._id, "score": self._score, **self._fields} diff --git a/pinecone/db_data/dataclasses/document_search_response.py b/pinecone/db_data/dataclasses/document_search_response.py new file mode 100644 index 00000000..4e8315d2 --- /dev/null +++ b/pinecone/db_data/dataclasses/document_search_response.py @@ -0,0 +1,40 @@ +"""DocumentSearchResponse class for document search responses.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import cast + +from .document import Document +from .utils import DictLike +from pinecone.utils.response_info import ResponseInfo +from pinecone.core.openapi.db_data.models import Usage + + +@dataclass +class DocumentSearchResponse(DictLike): + """Response from a document search operation. + + :param documents: List of documents matching the search query. + :param usage: Usage information for the request. + + Example usage:: + + results = index.search_documents( + namespace="movies", + score_by=text_query("title", "pink panther"), + top_k=10, + ) + + print(f"Found {len(results.documents)} documents") + print(f"Read units: {results.usage.read_units}") + + for doc in results.documents: + print(f"{doc.id}: {doc.score}") + """ + + documents: list[Document] + usage: Usage | None = None + _response_info: ResponseInfo = field( + default_factory=lambda: cast(ResponseInfo, {"raw_headers": {}}), repr=True, compare=False + ) diff --git a/tests/unit/data/test_document_classes.py b/tests/unit/data/test_document_classes.py new file mode 100644 index 00000000..6fbe35a4 --- /dev/null +++ b/tests/unit/data/test_document_classes.py @@ -0,0 +1,247 @@ +"""Tests for Document and DocumentSearchResponse classes.""" + +import pytest +from pinecone.db_data.dataclasses import Document, DocumentSearchResponse + + +class TestDocument: + """Tests for the Document class.""" + + def test_basic_construction(self): + """Test creating a document with id and score.""" + doc = Document(id="doc1", score=0.95) + assert doc.id == "doc1" + assert doc.score == 0.95 + + def test_construction_with_fields(self): + """Test creating a document with dynamic fields.""" + doc = Document(id="doc1", score=0.95, title="Test Title", year=2024) + assert doc.id == "doc1" + assert doc.score == 0.95 + assert doc.title == "Test Title" + assert doc.year == 2024 + + def test_attribute_access_dynamic_fields(self): + """Test accessing dynamic fields via attributes.""" + doc = Document(id="doc1", score=0.8, title="Pink Panther", genre="Comedy") + assert doc.title == "Pink Panther" + assert doc.genre == "Comedy" + + def test_attribute_access_missing_field(self): + """Test AttributeError for missing dynamic fields.""" + doc = Document(id="doc1", score=0.8) + with pytest.raises(AttributeError, match="has no attribute 'nonexistent'"): + _ = doc.nonexistent + + def test_dict_access_standard_fields(self): + """Test dict-style access to standard fields.""" + doc = Document(id="doc1", score=0.75) + assert doc["id"] == "doc1" + assert doc["score"] == 0.75 + + def test_dict_access_dynamic_fields(self): + """Test dict-style access to dynamic fields.""" + doc = Document(id="doc1", score=0.8, title="Test") + assert doc["title"] == "Test" + + def test_dict_access_missing_field(self): + """Test KeyError for missing fields.""" + doc = Document(id="doc1", score=0.8) + with pytest.raises(KeyError): + _ = doc["nonexistent"] + + def test_get_standard_fields(self): + """Test get() method for standard fields.""" + doc = Document(id="doc1", score=0.9) + assert doc.get("id") == "doc1" + assert doc.get("score") == 0.9 + + def test_get_dynamic_fields(self): + """Test get() method for dynamic fields.""" + doc = Document(id="doc1", score=0.9, title="Test") + assert doc.get("title") == "Test" + + def test_get_missing_field_returns_none(self): + """Test get() returns None for missing fields.""" + doc = Document(id="doc1", score=0.9) + assert doc.get("nonexistent") is None + + def test_get_missing_field_returns_default(self): + """Test get() returns default value for missing fields.""" + doc = Document(id="doc1", score=0.9) + assert doc.get("nonexistent", "N/A") == "N/A" + assert doc.get("nonexistent", 0) == 0 + + def test_keys(self): + """Test keys() method.""" + doc = Document(id="doc1", score=0.9, title="Test", year=2024) + keys = doc.keys() + assert "id" in keys + assert "score" in keys + assert "title" in keys + assert "year" in keys + assert len(keys) == 4 + + def test_values(self): + """Test values() method.""" + doc = Document(id="doc1", score=0.9, title="Test") + values = doc.values() + assert "doc1" in values + assert 0.9 in values + assert "Test" in values + + def test_items(self): + """Test items() method.""" + doc = Document(id="doc1", score=0.9, title="Test") + items = doc.items() + assert ("id", "doc1") in items + assert ("score", 0.9) in items + assert ("title", "Test") in items + + def test_contains_standard_fields(self): + """Test __contains__ for standard fields.""" + doc = Document(id="doc1", score=0.9) + assert "id" in doc + assert "score" in doc + + def test_contains_dynamic_fields(self): + """Test __contains__ for dynamic fields.""" + doc = Document(id="doc1", score=0.9, title="Test") + assert "title" in doc + assert "nonexistent" not in doc + + def test_iter(self): + """Test iteration over field names.""" + doc = Document(id="doc1", score=0.9, title="Test", year=2024) + field_names = list(doc) + assert "id" in field_names + assert "score" in field_names + assert "title" in field_names + assert "year" in field_names + + def test_len(self): + """Test __len__ returns correct count.""" + doc = Document(id="doc1", score=0.9) + assert len(doc) == 2 + + doc_with_fields = Document(id="doc1", score=0.9, title="Test", year=2024) + assert len(doc_with_fields) == 4 + + def test_repr_minimal(self): + """Test __repr__ with only standard fields.""" + doc = Document(id="doc1", score=0.9) + repr_str = repr(doc) + assert "Document" in repr_str + assert "doc1" in repr_str + assert "0.9" in repr_str + + def test_repr_with_fields(self): + """Test __repr__ with dynamic fields.""" + doc = Document(id="doc1", score=0.9, title="Test") + repr_str = repr(doc) + assert "Document" in repr_str + assert "doc1" in repr_str + assert "title" in repr_str + assert "Test" in repr_str + + def test_equality(self): + """Test __eq__ between documents.""" + doc1 = Document(id="doc1", score=0.9, title="Test") + doc2 = Document(id="doc1", score=0.9, title="Test") + doc3 = Document(id="doc2", score=0.9, title="Test") + + assert doc1 == doc2 + assert doc1 != doc3 + + def test_equality_different_types(self): + """Test __eq__ with non-Document types.""" + doc = Document(id="doc1", score=0.9) + assert doc != "not a document" + assert doc != {"id": "doc1", "score": 0.9} + + def test_to_dict(self): + """Test to_dict() conversion.""" + doc = Document(id="doc1", score=0.9, title="Test", year=2024) + d = doc.to_dict() + assert d == {"id": "doc1", "score": 0.9, "title": "Test", "year": 2024} + + def test_to_dict_minimal(self): + """Test to_dict() with only standard fields.""" + doc = Document(id="doc1", score=0.9) + d = doc.to_dict() + assert d == {"id": "doc1", "score": 0.9} + + +class TestDocumentSearchResponse: + """Tests for the DocumentSearchResponse class.""" + + def test_basic_construction(self): + """Test creating a response with documents.""" + docs = [ + Document(id="doc1", score=0.95, title="First"), + Document(id="doc2", score=0.85, title="Second"), + ] + response = DocumentSearchResponse(documents=docs) + assert len(response.documents) == 2 + assert response.documents[0].id == "doc1" + assert response.documents[1].id == "doc2" + + def test_empty_documents(self): + """Test response with empty document list.""" + response = DocumentSearchResponse(documents=[]) + assert len(response.documents) == 0 + + def test_usage_none_by_default(self): + """Test that usage is None by default.""" + response = DocumentSearchResponse(documents=[]) + assert response.usage is None + + def test_dict_like_access(self): + """Test dict-style access to fields.""" + docs = [Document(id="doc1", score=0.9)] + response = DocumentSearchResponse(documents=docs) + assert response["documents"] == docs + assert response.get("usage") is None + + def test_dict_like_get_with_default(self): + """Test get() with default value.""" + response = DocumentSearchResponse(documents=[]) + assert response.get("nonexistent", "default") == "default" + + +class TestDocumentUsageExamples: + """Test the usage examples from the ticket.""" + + def test_ticket_example(self): + """Test the example from the ticket description.""" + # Simulate what results from index.search_documents() would look like + doc = Document(id="movie-123", score=0.95, title="The Pink Panther", year=1963) + + # Standard field access + assert doc.id == "movie-123" + assert doc.score == 0.95 + + # Attribute access to dynamic field + assert doc.title == "The Pink Panther" + + # Dict access to dynamic field + assert doc["title"] == "The Pink Panther" + + # Safe access with default + assert doc.get("title", "N/A") == "The Pink Panther" + assert doc.get("director", "Unknown") == "Unknown" + + def test_iterate_over_results(self): + """Test iterating over search results.""" + docs = [ + Document(id="doc1", score=0.95, title="First"), + Document(id="doc2", score=0.85, title="Second"), + Document(id="doc3", score=0.75, title="Third"), + ] + response = DocumentSearchResponse(documents=docs) + + ids = [doc.id for doc in response.documents] + assert ids == ["doc1", "doc2", "doc3"] + + scores = [doc.score for doc in response.documents] + assert scores == [0.95, 0.85, 0.75]