diff --git a/pinecone/__init__.pyi b/pinecone/__init__.pyi index 45ca8caf3..dd36cfbb4 100644 --- a/pinecone/__init__.pyi +++ b/pinecone/__init__.pyi @@ -37,7 +37,12 @@ from pinecone.db_data.dataclasses import ( QueryResponse, UpsertResponse, UpdateResponse, + TextQuery, + VectorQuery, + Document, + DocumentSearchResponse, ) +from pinecone.db_data.query_helpers import text_query, vector_query from pinecone.db_data.models import ( DeleteRequest, DescribeIndexStatsRequest, @@ -84,6 +89,20 @@ from pinecone.db_control.models import ( BackupList, RestoreJobModel, RestoreJobList, + # Schema field types + TextField, + IntegerField, + FloatField, + DenseVectorField, + SparseVectorField, + SemanticTextField, + SchemaField, + SchemaBuilder, + # Deployment classes + ServerlessDeployment, + ByocDeployment, + PodDeployment, + Deployment, ) from pinecone.db_control.models.serverless_spec import ( ScalingConfigManualDict, @@ -165,6 +184,13 @@ __all__ = [ "SearchQuery", "SearchQueryVector", "SearchRerank", + "TextQuery", + "VectorQuery", + "Document", + "DocumentSearchResponse", + # Query helper functions + "text_query", + "vector_query", # Data response classes "FetchResponse", "FetchByMetadataResponse", @@ -215,6 +241,20 @@ __all__ = [ "BackupList", "RestoreJobModel", "RestoreJobList", + # Schema field types + "TextField", + "IntegerField", + "FloatField", + "DenseVectorField", + "SparseVectorField", + "SemanticTextField", + "SchemaField", + "SchemaBuilder", + # Deployment classes + "ServerlessDeployment", + "ByocDeployment", + "PodDeployment", + "Deployment", # Control plane types "ConfigureIndexEmbed", "CreateIndexForModelEmbedTypedDict", diff --git a/tests/integration/rest_sync/db/data/fts/__init__.py b/tests/integration/rest_sync/db/data/fts/__init__.py new file mode 100644 index 000000000..6c8c99922 --- /dev/null +++ b/tests/integration/rest_sync/db/data/fts/__init__.py @@ -0,0 +1 @@ +"""FTS data plane integration tests.""" diff --git a/tests/integration/rest_sync/db/data/fts/conftest.py b/tests/integration/rest_sync/db/data/fts/conftest.py new file mode 100644 index 000000000..2c073c47a --- /dev/null +++ b/tests/integration/rest_sync/db/data/fts/conftest.py @@ -0,0 +1,147 @@ +"""Fixtures for FTS data plane integration tests.""" + +import pytest +import uuid +import logging +import dotenv +from pinecone import Pinecone, TextField, IntegerField, DenseVectorField +from tests.integration.helpers import ( + delete_indexes_from_run, + index_tags, + embedding_values, + poll_until_lsn_reconciled, +) + +dotenv.load_dotenv() + +logger = logging.getLogger(__name__) + +RUN_ID = str(uuid.uuid4()) + +FTS_INDEX_DIMENSION = 8 + + +@pytest.fixture(scope="module") +def pc(): + """Create a Pinecone client.""" + return Pinecone() + + +@pytest.fixture(scope="module") +def fts_tags(request): + """Generate tags for FTS test indexes.""" + return index_tags(request, RUN_ID) + + +@pytest.fixture(scope="module") +def fts_index_name(): + """Generate a unique index name for FTS tests.""" + return f"fts-data-{str(uuid.uuid4())[:8]}" + + +@pytest.fixture(scope="module") +def fts_index_host(pc: Pinecone, fts_index_name: str, fts_tags: dict): + """Create an FTS-enabled index with schema and return its host. + + This creates an index with: + - title: full-text searchable text field + - description: full-text searchable text field + - category: filterable text field + - year: filterable integer field + - embedding: dense vector field + """ + schema = { + "title": TextField(full_text_searchable=True), + "description": TextField(full_text_searchable=True), + "category": TextField(filterable=True), + "year": IntegerField(filterable=True), + "embedding": DenseVectorField(dimension=FTS_INDEX_DIMENSION, metric="cosine"), + } + + logger.info(f"Creating FTS index {fts_index_name} with schema") + pc.db.index.create(name=fts_index_name, schema=schema, tags=fts_tags) + + description = pc.db.index.describe(name=fts_index_name) + host = description.host + logger.info(f"FTS index {fts_index_name} created with host: {host}") + + yield host + + logger.info(f"Deleting FTS index {fts_index_name}") + try: + pc.db.index.delete(name=fts_index_name) + except Exception as e: + logger.warning(f"Failed to delete FTS index {fts_index_name}: {e}") + + +@pytest.fixture(scope="module") +def fts_index(pc: Pinecone, fts_index_name: str, fts_index_host: str): + """Get an Index client for the FTS index.""" + return pc.Index(name=fts_index_name, host=fts_index_host) + + +@pytest.fixture(scope="module") +def seeded_fts_namespace(fts_index, fts_index_name): + """Seed the FTS index with test documents and return the namespace. + + Returns the namespace that contains the seeded documents. + """ + namespace = f"test-{str(uuid.uuid4())[:8]}" + + documents = [ + { + "_id": "movie-1", + "title": "Return of the Pink Panther", + "description": "Inspector Clouseau investigates a diamond heist.", + "category": "comedy", + "year": 1975, + "embedding": embedding_values(FTS_INDEX_DIMENSION), + }, + { + "_id": "movie-2", + "title": "The Pink Panther Strikes Again", + "description": "Clouseau's former boss tries to eliminate him.", + "category": "comedy", + "year": 1976, + "embedding": embedding_values(FTS_INDEX_DIMENSION), + }, + { + "_id": "movie-3", + "title": "Revenge of the Pink Panther", + "description": "Clouseau is believed dead and investigates his own murder.", + "category": "comedy", + "year": 1978, + "embedding": embedding_values(FTS_INDEX_DIMENSION), + }, + { + "_id": "movie-4", + "title": "The Matrix", + "description": "A hacker discovers the true nature of reality.", + "category": "scifi", + "year": 1999, + "embedding": embedding_values(FTS_INDEX_DIMENSION), + }, + { + "_id": "movie-5", + "title": "Blade Runner", + "description": "A blade runner must pursue and terminate rogue replicants.", + "category": "scifi", + "year": 1982, + "embedding": embedding_values(FTS_INDEX_DIMENSION), + }, + ] + + logger.info(f"Upserting {len(documents)} documents to namespace {namespace}") + upsert_response = fts_index.upsert_documents(namespace=namespace, documents=documents) + + poll_until_lsn_reconciled(fts_index, upsert_response._response_info, namespace=namespace) + + logger.info(f"Seeded namespace {namespace} with {len(documents)} documents") + return namespace + + +def pytest_sessionfinish(session, exitstatus): + """Clean up indexes created during the test session.""" + logger.info("Running final cleanup after FTS data plane tests...") + pc = Pinecone() + delete_indexes_from_run(pc, RUN_ID) diff --git a/tests/integration/rest_sync/db/data/fts/test_backwards_compatibility.py b/tests/integration/rest_sync/db/data/fts/test_backwards_compatibility.py new file mode 100644 index 000000000..ca8d89170 --- /dev/null +++ b/tests/integration/rest_sync/db/data/fts/test_backwards_compatibility.py @@ -0,0 +1,210 @@ +"""Integration tests for backwards compatibility. + +These tests verify that existing query(), search(), and upsert() methods +continue to work with schema-based indexes. +""" + +import pytest +import os +import uuid +from pinecone import Vector +from tests.integration.helpers import embedding_values, poll_until_lsn_reconciled + +FTS_INDEX_DIMENSION = 8 + + +@pytest.mark.skipif(os.getenv("USE_GRPC") != "false", reason="These tests are for REST client only") +class TestQueryMethodCompatibility: + """Test that existing query() method still works with FTS indexes.""" + + def test_query_with_vector(self, fts_index, seeded_fts_namespace): + """Test that query() works with a vector on FTS index.""" + query_vector = embedding_values(FTS_INDEX_DIMENSION) + + response = fts_index.query( + namespace=seeded_fts_namespace, vector=query_vector, top_k=5, include_metadata=True + ) + + assert response is not None + assert hasattr(response, "matches") + assert len(response.matches) >= 1 + + for match in response.matches: + assert match.id is not None + assert match.score is not None + + def test_query_with_filter(self, fts_index, seeded_fts_namespace): + """Test that query() with filter works on FTS index.""" + query_vector = embedding_values(FTS_INDEX_DIMENSION) + + response = fts_index.query( + namespace=seeded_fts_namespace, + vector=query_vector, + top_k=5, + filter={"category": {"$eq": "comedy"}}, + include_metadata=True, + ) + + assert response is not None + for match in response.matches: + if match.metadata: + assert match.metadata.get("category") == "comedy" + + def test_query_with_include_values(self, fts_index, seeded_fts_namespace): + """Test that query() with include_values works on FTS index.""" + query_vector = embedding_values(FTS_INDEX_DIMENSION) + + response = fts_index.query( + namespace=seeded_fts_namespace, vector=query_vector, top_k=3, include_values=True + ) + + assert response is not None + assert len(response.matches) >= 1 + + +@pytest.mark.skipif(os.getenv("USE_GRPC") != "false", reason="These tests are for REST client only") +class TestUpsertMethodCompatibility: + """Test that existing upsert() method still works with FTS indexes.""" + + def test_upsert_with_vector_object(self, fts_index): + """Test that upsert() works with Vector objects on FTS index.""" + namespace = f"compat-upsert-vec-{str(uuid.uuid4())[:8]}" + + vectors = [ + Vector( + id="compat-vec-1", + values=embedding_values(FTS_INDEX_DIMENSION), + metadata={"title": "Test Vector 1", "category": "test", "year": 2025}, + ), + Vector( + id="compat-vec-2", + values=embedding_values(FTS_INDEX_DIMENSION), + metadata={"title": "Test Vector 2", "category": "test", "year": 2025}, + ), + ] + + response = fts_index.upsert(namespace=namespace, vectors=vectors) + + assert response.upserted_count == 2 + + def test_upsert_with_tuple(self, fts_index): + """Test that upsert() works with tuple format on FTS index.""" + namespace = f"compat-upsert-tuple-{str(uuid.uuid4())[:8]}" + + vectors = [ + ("tuple-vec-1", embedding_values(FTS_INDEX_DIMENSION)), + ("tuple-vec-2", embedding_values(FTS_INDEX_DIMENSION)), + ] + + response = fts_index.upsert(namespace=namespace, vectors=vectors) + + assert response.upserted_count == 2 + + def test_upsert_with_tuple_and_metadata(self, fts_index): + """Test that upsert() works with tuple format including metadata.""" + namespace = f"compat-upsert-meta-{str(uuid.uuid4())[:8]}" + + vectors = [ + ( + "meta-vec-1", + embedding_values(FTS_INDEX_DIMENSION), + {"title": "Metadata Test", "category": "compat", "year": 2024}, + ) + ] + + response = fts_index.upsert(namespace=namespace, vectors=vectors) + + assert response.upserted_count == 1 + + def test_upsert_with_dict(self, fts_index): + """Test that upsert() works with dict format on FTS index.""" + namespace = f"compat-upsert-dict-{str(uuid.uuid4())[:8]}" + + vectors = [ + { + "id": "dict-vec-1", + "values": embedding_values(FTS_INDEX_DIMENSION), + "metadata": {"title": "Dict Vector", "category": "dict", "year": 2023}, + } + ] + + response = fts_index.upsert(namespace=namespace, vectors=vectors) + + assert response.upserted_count == 1 + + def test_upsert_and_query_roundtrip(self, fts_index): + """Test that upserted vectors can be queried.""" + namespace = f"compat-roundtrip-{str(uuid.uuid4())[:8]}" + test_values = embedding_values(FTS_INDEX_DIMENSION) + + vectors = [ + Vector( + id="roundtrip-vec", + values=test_values, + metadata={"title": "Roundtrip Test", "category": "roundtrip", "year": 2025}, + ) + ] + + upsert_response = fts_index.upsert(namespace=namespace, vectors=vectors) + assert upsert_response.upserted_count == 1 + + poll_until_lsn_reconciled(fts_index, upsert_response._response_info, namespace=namespace) + + query_response = fts_index.query( + namespace=namespace, vector=test_values, top_k=1, include_metadata=True + ) + + assert len(query_response.matches) == 1 + assert query_response.matches[0].id == "roundtrip-vec" + + +@pytest.mark.skipif(os.getenv("USE_GRPC") != "false", reason="These tests are for REST client only") +class TestFetchMethodCompatibility: + """Test that existing fetch() method still works with FTS indexes.""" + + def test_fetch_by_id(self, fts_index, seeded_fts_namespace): + """Test that fetch() works on FTS index.""" + response = fts_index.fetch(namespace=seeded_fts_namespace, ids=["movie-1", "movie-2"]) + + assert response is not None + assert hasattr(response, "vectors") + assert len(response.vectors) >= 1 + + +@pytest.mark.skipif(os.getenv("USE_GRPC") != "false", reason="These tests are for REST client only") +class TestDeleteMethodCompatibility: + """Test that existing delete() method still works with FTS indexes.""" + + def test_delete_by_id(self, fts_index): + """Test that delete() by id works on FTS index.""" + namespace = f"compat-delete-{str(uuid.uuid4())[:8]}" + + vectors = [ + Vector( + id="delete-me", + values=embedding_values(FTS_INDEX_DIMENSION), + metadata={"title": "To Delete", "category": "delete", "year": 2025}, + ) + ] + + upsert_response = fts_index.upsert(namespace=namespace, vectors=vectors) + poll_until_lsn_reconciled(fts_index, upsert_response._response_info, namespace=namespace) + + delete_response = fts_index.delete(namespace=namespace, ids=["delete-me"]) + + assert delete_response == {} + + +@pytest.mark.skipif(os.getenv("USE_GRPC") != "false", reason="These tests are for REST client only") +class TestDescribeIndexStatsCompatibility: + """Test that describe_index_stats() still works with FTS indexes.""" + + def test_describe_index_stats(self, fts_index): + """Test that describe_index_stats() returns expected structure.""" + stats = fts_index.describe_index_stats() + + assert stats is not None + assert hasattr(stats, "dimension") + assert hasattr(stats, "total_vector_count") + assert hasattr(stats, "namespaces") + assert stats.dimension == FTS_INDEX_DIMENSION diff --git a/tests/integration/rest_sync/db/data/fts/test_document_response.py b/tests/integration/rest_sync/db/data/fts/test_document_response.py new file mode 100644 index 000000000..313725017 --- /dev/null +++ b/tests/integration/rest_sync/db/data/fts/test_document_response.py @@ -0,0 +1,353 @@ +"""Integration tests for Document and DocumentSearchResponse. + +These tests verify the response structure and access patterns for document search results. +""" + +import pytest +import os +from pinecone import text_query + + +@pytest.mark.skipif( + os.getenv("USE_GRPC") != "false", reason="Document operations not supported in gRPC" +) +class TestDocumentAttributeAccess: + """Test Document attribute access patterns.""" + + def test_document_id_property(self, fts_index, seeded_fts_namespace): + """Test accessing document id via property.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, score_by=text_query("title", "panther"), top_k=1 + ) + + assert len(results.documents) >= 1 + doc = results.documents[0] + + # Access via property + assert doc.id is not None + assert isinstance(doc.id, str) + + def test_document_score_property(self, fts_index, seeded_fts_namespace): + """Test accessing document score via property.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, score_by=text_query("title", "panther"), top_k=1 + ) + + assert len(results.documents) >= 1 + doc = results.documents[0] + + # Access via property + assert doc.score is not None + assert isinstance(doc.score, (int, float)) + assert doc.score >= 0 + + def test_document_dynamic_field_attribute_access(self, fts_index, seeded_fts_namespace): + """Test accessing dynamic fields via attribute access.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=text_query("title", "panther"), + include_fields=["*"], + top_k=1, + ) + + assert len(results.documents) >= 1 + doc = results.documents[0] + + # Access dynamic fields via attribute + assert doc.title is not None + assert doc.category is not None + assert doc.year is not None + + def test_document_missing_attribute_raises_error(self, fts_index, seeded_fts_namespace): + """Test that accessing a non-existent attribute raises AttributeError.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, score_by=text_query("title", "panther"), top_k=1 + ) + + assert len(results.documents) >= 1 + doc = results.documents[0] + + with pytest.raises(AttributeError): + _ = doc.nonexistent_field + + +@pytest.mark.skipif( + os.getenv("USE_GRPC") != "false", reason="Document operations not supported in gRPC" +) +class TestDocumentDictAccess: + """Test Document dict-style access patterns.""" + + def test_document_dict_access_id(self, fts_index, seeded_fts_namespace): + """Test accessing id via dict-style access.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, score_by=text_query("title", "panther"), top_k=1 + ) + + assert len(results.documents) >= 1 + doc = results.documents[0] + + # Access via dict syntax + assert doc["id"] is not None + assert doc["id"] == doc.id + + def test_document_dict_access_score(self, fts_index, seeded_fts_namespace): + """Test accessing score via dict-style access.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, score_by=text_query("title", "panther"), top_k=1 + ) + + assert len(results.documents) >= 1 + doc = results.documents[0] + + # Access via dict syntax + assert doc["score"] is not None + assert doc["score"] == doc.score + + def test_document_dict_access_dynamic_fields(self, fts_index, seeded_fts_namespace): + """Test accessing dynamic fields via dict-style access.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=text_query("title", "panther"), + include_fields=["*"], + top_k=1, + ) + + assert len(results.documents) >= 1 + doc = results.documents[0] + + # Access dynamic fields via dict syntax + assert doc["title"] is not None + assert doc["category"] is not None + assert doc["year"] is not None + + def test_document_missing_key_raises_error(self, fts_index, seeded_fts_namespace): + """Test that accessing a non-existent key raises KeyError.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, score_by=text_query("title", "panther"), top_k=1 + ) + + assert len(results.documents) >= 1 + doc = results.documents[0] + + with pytest.raises(KeyError): + _ = doc["nonexistent_key"] + + +@pytest.mark.skipif( + os.getenv("USE_GRPC") != "false", reason="Document operations not supported in gRPC" +) +class TestDocumentGetMethod: + """Test Document get() method with defaults.""" + + def test_document_get_existing_field(self, fts_index, seeded_fts_namespace): + """Test get() method for existing field returns value.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=text_query("title", "panther"), + include_fields=["*"], + top_k=1, + ) + + assert len(results.documents) >= 1 + doc = results.documents[0] + + # get() returns value for existing field + assert doc.get("id") == doc.id + assert doc.get("score") == doc.score + assert doc.get("title") == doc.title + + def test_document_get_missing_field_returns_none(self, fts_index, seeded_fts_namespace): + """Test get() method for missing field returns None by default.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, score_by=text_query("title", "panther"), top_k=1 + ) + + assert len(results.documents) >= 1 + doc = results.documents[0] + + # get() returns None for missing field + assert doc.get("nonexistent_field") is None + + def test_document_get_missing_field_returns_default(self, fts_index, seeded_fts_namespace): + """Test get() method for missing field returns provided default.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, score_by=text_query("title", "panther"), top_k=1 + ) + + assert len(results.documents) >= 1 + doc = results.documents[0] + + # get() returns custom default for missing field + assert doc.get("nonexistent_field", "default_value") == "default_value" + assert doc.get("nonexistent_field", 42) == 42 + + +@pytest.mark.skipif( + os.getenv("USE_GRPC") != "false", reason="Document operations not supported in gRPC" +) +class TestDocumentContains: + """Test Document __contains__ method for 'in' operator.""" + + def test_document_contains_standard_fields(self, fts_index, seeded_fts_namespace): + """Test 'in' operator for standard fields.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, score_by=text_query("title", "panther"), top_k=1 + ) + + assert len(results.documents) >= 1 + doc = results.documents[0] + + assert "id" in doc + assert "score" in doc + + def test_document_contains_dynamic_fields(self, fts_index, seeded_fts_namespace): + """Test 'in' operator for dynamic fields.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=text_query("title", "panther"), + include_fields=["*"], + top_k=1, + ) + + assert len(results.documents) >= 1 + doc = results.documents[0] + + assert "title" in doc + assert "category" in doc + assert "year" in doc + + def test_document_not_contains_missing_field(self, fts_index, seeded_fts_namespace): + """Test 'in' operator returns False for missing fields.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, score_by=text_query("title", "panther"), top_k=1 + ) + + assert len(results.documents) >= 1 + doc = results.documents[0] + + assert "nonexistent_field" not in doc + + +@pytest.mark.skipif( + os.getenv("USE_GRPC") != "false", reason="Document operations not supported in gRPC" +) +class TestDocumentIteration: + """Test Document iteration and dict-like methods.""" + + def test_document_keys(self, fts_index, seeded_fts_namespace): + """Test Document.keys() method.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=text_query("title", "panther"), + include_fields=["*"], + top_k=1, + ) + + assert len(results.documents) >= 1 + doc = results.documents[0] + + keys = doc.keys() + assert "id" in keys + assert "score" in keys + assert "title" in keys + + def test_document_values(self, fts_index, seeded_fts_namespace): + """Test Document.values() method.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=text_query("title", "panther"), + include_fields=["*"], + top_k=1, + ) + + assert len(results.documents) >= 1 + doc = results.documents[0] + + values = doc.values() + assert len(values) >= 2 # At least id and score + + def test_document_items(self, fts_index, seeded_fts_namespace): + """Test Document.items() method.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=text_query("title", "panther"), + include_fields=["*"], + top_k=1, + ) + + assert len(results.documents) >= 1 + doc = results.documents[0] + + items = doc.items() + items_dict = dict(items) + assert "id" in items_dict + assert "score" in items_dict + + def test_document_iteration(self, fts_index, seeded_fts_namespace): + """Test iterating over Document yields field names.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=text_query("title", "panther"), + include_fields=["*"], + top_k=1, + ) + + assert len(results.documents) >= 1 + doc = results.documents[0] + + field_names = list(doc) + assert "id" in field_names + assert "score" in field_names + + def test_document_to_dict(self, fts_index, seeded_fts_namespace): + """Test Document.to_dict() method.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=text_query("title", "panther"), + include_fields=["*"], + top_k=1, + ) + + assert len(results.documents) >= 1 + doc = results.documents[0] + + doc_dict = doc.to_dict() + assert isinstance(doc_dict, dict) + assert "id" in doc_dict + assert "score" in doc_dict + assert doc_dict["id"] == doc.id + assert doc_dict["score"] == doc.score + + +@pytest.mark.skipif( + os.getenv("USE_GRPC") != "false", reason="Document operations not supported in gRPC" +) +class TestDocumentSearchResponseStructure: + """Test DocumentSearchResponse structure.""" + + def test_response_has_documents_list(self, fts_index, seeded_fts_namespace): + """Test that response has documents list.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, score_by=text_query("title", "panther"), top_k=10 + ) + + assert hasattr(results, "documents") + assert isinstance(results.documents, list) + + def test_response_has_usage(self, fts_index, seeded_fts_namespace): + """Test that response has usage information.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, score_by=text_query("title", "panther"), top_k=10 + ) + + assert hasattr(results, "usage") + assert results.usage is not None + + def test_response_has_response_info(self, fts_index, seeded_fts_namespace): + """Test that response has _response_info with headers.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, score_by=text_query("title", "panther"), top_k=10 + ) + + assert hasattr(results, "_response_info") + assert results._response_info is not None diff --git a/tests/integration/rest_sync/db/data/fts/test_search_documents_text.py b/tests/integration/rest_sync/db/data/fts/test_search_documents_text.py new file mode 100644 index 000000000..e7d29e1e6 --- /dev/null +++ b/tests/integration/rest_sync/db/data/fts/test_search_documents_text.py @@ -0,0 +1,204 @@ +"""Integration tests for search_documents() with text queries. + +These tests verify full-text search functionality using TextQuery. +""" + +import pytest +import os +from pinecone import text_query, TextQuery + + +@pytest.mark.skipif( + os.getenv("USE_GRPC") != "false", reason="Document operations not supported in gRPC" +) +class TestSearchDocumentsWithTextQuery: + """Test search_documents() with various text query patterns.""" + + def test_simple_text_search(self, fts_index, seeded_fts_namespace): + """Test simple text search returns matching documents.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, score_by=text_query("title", "pink panther"), top_k=10 + ) + + assert len(results.documents) >= 1 + assert results.usage is not None + + # All results should contain "pink" or "panther" in the title + for doc in results.documents: + assert doc.id is not None + assert doc.score >= 0 + title_lower = doc.title.lower() + assert "pink" in title_lower or "panther" in title_lower + + def test_text_search_with_phrase_matching(self, fts_index, seeded_fts_namespace): + """Test phrase matching with quoted strings.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, score_by=text_query("title", '"Pink Panther"'), top_k=10 + ) + + assert len(results.documents) >= 1 + # Phrase match should find "Pink Panther" as an exact phrase + for doc in results.documents: + assert "Pink Panther" in doc.title + + def test_text_search_with_required_terms(self, fts_index, seeded_fts_namespace): + """Test required terms with +term syntax.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, score_by=text_query("title", "+Return +Pink"), top_k=10 + ) + + # Should find "Return of the Pink Panther" + assert len(results.documents) >= 1 + for doc in results.documents: + title_lower = doc.title.lower() + assert "return" in title_lower + assert "pink" in title_lower + + def test_text_search_with_boost(self, fts_index, seeded_fts_namespace): + """Test boost parameter for relevance scoring.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=text_query("title", "panther", boost=2.0), + top_k=10, + ) + + assert len(results.documents) >= 1 + # Verify boost doesn't break the query + for doc in results.documents: + assert doc.score >= 0 + + def test_text_search_with_slop(self, fts_index, seeded_fts_namespace): + """Test slop parameter for phrase proximity.""" + # With slop=1, "Return Panther" should match "Return of the Pink Panther" + # because terms can be 1 position apart + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=text_query("title", '"Pink Panther"', slop=2), + top_k=10, + ) + + assert len(results.documents) >= 1 + + def test_text_search_using_class_directly(self, fts_index, seeded_fts_namespace): + """Test using TextQuery class directly instead of helper function.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=TextQuery(field="title", query="Matrix"), + top_k=10, + ) + + assert len(results.documents) >= 1 + for doc in results.documents: + assert "Matrix" in doc.title + + def test_text_search_on_description_field(self, fts_index, seeded_fts_namespace): + """Test text search on a different full-text searchable field.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=text_query("description", "investigates"), + top_k=10, + ) + + assert len(results.documents) >= 1 + for doc in results.documents: + assert "investigates" in doc.description.lower() + + +@pytest.mark.skipif( + os.getenv("USE_GRPC") != "false", reason="Document operations not supported in gRPC" +) +class TestSearchDocumentsWithTextFiltering: + """Test search_documents() with text queries and metadata filtering.""" + + def test_text_search_with_category_filter(self, fts_index, seeded_fts_namespace): + """Test text search combined with category filter.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=text_query("title", "panther"), + filter={"category": {"$eq": "comedy"}}, + top_k=10, + ) + + assert len(results.documents) >= 1 + for doc in results.documents: + assert doc.category == "comedy" + + def test_text_search_with_year_filter(self, fts_index, seeded_fts_namespace): + """Test text search combined with year filter.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=text_query("title", "panther"), + filter={"year": {"$gte": 1976}}, + top_k=10, + ) + + assert len(results.documents) >= 1 + for doc in results.documents: + assert doc.year >= 1976 + + def test_text_search_with_combined_filters(self, fts_index, seeded_fts_namespace): + """Test text search with multiple filter conditions.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=text_query("title", "panther"), + filter={"category": {"$eq": "comedy"}, "year": {"$lte": 1976}}, + top_k=10, + ) + + assert len(results.documents) >= 1 + for doc in results.documents: + assert doc.category == "comedy" + assert doc.year <= 1976 + + def test_text_search_with_text_match_filter(self, fts_index, seeded_fts_namespace): + """Test using $text_match operator in filter.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=text_query("title", "panther"), + filter={"description": {"$text_match": "investigates"}}, + top_k=10, + ) + + assert len(results.documents) >= 1 + for doc in results.documents: + assert "investigates" in doc.description.lower() + + +@pytest.mark.skipif( + os.getenv("USE_GRPC") != "false", reason="Document operations not supported in gRPC" +) +class TestSearchDocumentsWithIncludeFields: + """Test search_documents() with include_fields parameter.""" + + def test_include_specific_fields(self, fts_index, seeded_fts_namespace): + """Test including only specific fields in response.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=text_query("title", "panther"), + include_fields=["title", "year"], + top_k=10, + ) + + assert len(results.documents) >= 1 + for doc in results.documents: + assert "title" in doc + assert "year" in doc + # Other fields should not be present + assert "description" not in doc or doc.get("description") is None + assert "category" not in doc or doc.get("category") is None + + def test_include_all_fields(self, fts_index, seeded_fts_namespace): + """Test including all fields with wildcard.""" + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=text_query("title", "panther"), + include_fields=["*"], + top_k=10, + ) + + assert len(results.documents) >= 1 + for doc in results.documents: + assert "title" in doc + assert "description" in doc + assert "category" in doc + assert "year" in doc diff --git a/tests/integration/rest_sync/db/data/fts/test_search_documents_vector.py b/tests/integration/rest_sync/db/data/fts/test_search_documents_vector.py new file mode 100644 index 000000000..fb940cb2e --- /dev/null +++ b/tests/integration/rest_sync/db/data/fts/test_search_documents_vector.py @@ -0,0 +1,165 @@ +"""Integration tests for search_documents() with vector queries. + +These tests verify vector similarity search functionality using VectorQuery. +""" + +import pytest +import os +from pinecone import vector_query, VectorQuery +from tests.integration.helpers import embedding_values + +FTS_INDEX_DIMENSION = 8 + + +@pytest.mark.skipif( + os.getenv("USE_GRPC") != "false", reason="Document operations not supported in gRPC" +) +class TestSearchDocumentsWithVectorQuery: + """Test search_documents() with dense vector queries.""" + + def test_dense_vector_search(self, fts_index, seeded_fts_namespace): + """Test dense vector similarity search.""" + query_vector = embedding_values(FTS_INDEX_DIMENSION) + + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=vector_query("embedding", values=query_vector), + top_k=10, + ) + + assert len(results.documents) >= 1 + assert results.usage is not None + + for doc in results.documents: + assert doc.id is not None + assert doc.score >= 0 + + def test_vector_search_using_class_directly(self, fts_index, seeded_fts_namespace): + """Test using VectorQuery class directly instead of helper function.""" + query_vector = embedding_values(FTS_INDEX_DIMENSION) + + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=VectorQuery(field="embedding", values=query_vector), + top_k=5, + ) + + assert len(results.documents) >= 1 + assert len(results.documents) <= 5 + + def test_vector_search_with_top_k(self, fts_index, seeded_fts_namespace): + """Test that top_k limits the number of results.""" + query_vector = embedding_values(FTS_INDEX_DIMENSION) + + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=vector_query("embedding", values=query_vector), + top_k=2, + ) + + assert len(results.documents) <= 2 + + +@pytest.mark.skipif( + os.getenv("USE_GRPC") != "false", reason="Document operations not supported in gRPC" +) +class TestVectorSearchWithFilters: + """Test search_documents() with vector queries and metadata filtering.""" + + def test_vector_search_with_category_filter(self, fts_index, seeded_fts_namespace): + """Test vector search combined with category filter.""" + query_vector = embedding_values(FTS_INDEX_DIMENSION) + + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=vector_query("embedding", values=query_vector), + filter={"category": {"$eq": "comedy"}}, + top_k=10, + ) + + for doc in results.documents: + assert doc.category == "comedy" + + def test_vector_search_with_year_filter(self, fts_index, seeded_fts_namespace): + """Test vector search combined with year range filter.""" + query_vector = embedding_values(FTS_INDEX_DIMENSION) + + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=vector_query("embedding", values=query_vector), + filter={"year": {"$gte": 1980}}, + top_k=10, + ) + + for doc in results.documents: + assert doc.year >= 1980 + + def test_vector_search_with_text_match_filter(self, fts_index, seeded_fts_namespace): + """Test vector search with $text_match filter on text field.""" + query_vector = embedding_values(FTS_INDEX_DIMENSION) + + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=vector_query("embedding", values=query_vector), + filter={"title": {"$text_match": "panther"}}, + top_k=10, + ) + + for doc in results.documents: + assert "panther" in doc.title.lower() + + def test_vector_search_with_combined_filters(self, fts_index, seeded_fts_namespace): + """Test vector search with multiple filter conditions.""" + query_vector = embedding_values(FTS_INDEX_DIMENSION) + + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=vector_query("embedding", values=query_vector), + filter={"category": {"$eq": "scifi"}, "year": {"$gte": 1990}}, + top_k=10, + ) + + for doc in results.documents: + assert doc.category == "scifi" + assert doc.year >= 1990 + + +@pytest.mark.skipif( + os.getenv("USE_GRPC") != "false", reason="Document operations not supported in gRPC" +) +class TestVectorSearchWithIncludeFields: + """Test search_documents() with vector queries and include_fields.""" + + def test_vector_search_include_specific_fields(self, fts_index, seeded_fts_namespace): + """Test vector search with specific fields included.""" + query_vector = embedding_values(FTS_INDEX_DIMENSION) + + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=vector_query("embedding", values=query_vector), + include_fields=["title", "category"], + top_k=10, + ) + + assert len(results.documents) >= 1 + for doc in results.documents: + assert "title" in doc + assert "category" in doc + + def test_vector_search_include_all_fields(self, fts_index, seeded_fts_namespace): + """Test vector search with all fields included.""" + query_vector = embedding_values(FTS_INDEX_DIMENSION) + + results = fts_index.search_documents( + namespace=seeded_fts_namespace, + score_by=vector_query("embedding", values=query_vector), + include_fields=["*"], + top_k=10, + ) + + assert len(results.documents) >= 1 + for doc in results.documents: + assert "title" in doc + assert "description" in doc + assert "category" in doc + assert "year" in doc diff --git a/tests/integration/rest_sync/db/data/fts/test_upsert_documents.py b/tests/integration/rest_sync/db/data/fts/test_upsert_documents.py new file mode 100644 index 000000000..8287dc86b --- /dev/null +++ b/tests/integration/rest_sync/db/data/fts/test_upsert_documents.py @@ -0,0 +1,200 @@ +"""Integration tests for upsert_documents(). + +These tests verify the document upsert functionality for schema-based indexes. +""" + +import pytest +import os +import uuid +from pinecone import text_query +from tests.integration.helpers import embedding_values, poll_until_lsn_reconciled + +FTS_INDEX_DIMENSION = 8 + + +@pytest.mark.skipif( + os.getenv("USE_GRPC") != "false", reason="Document operations not supported in gRPC" +) +class TestUpsertDocuments: + """Test upsert_documents() functionality.""" + + def test_upsert_single_document(self, fts_index): + """Test upserting a single document.""" + namespace = f"upsert-single-{str(uuid.uuid4())[:8]}" + + documents = [ + { + "_id": "doc-1", + "title": "Test Document", + "description": "A test document for upserting.", + "category": "test", + "year": 2025, + "embedding": embedding_values(FTS_INDEX_DIMENSION), + } + ] + + response = fts_index.upsert_documents(namespace=namespace, documents=documents) + + assert response.upserted_count == 1 + + def test_upsert_multiple_documents(self, fts_index): + """Test upserting multiple documents in a batch.""" + namespace = f"upsert-batch-{str(uuid.uuid4())[:8]}" + + documents = [ + { + "_id": f"doc-{i}", + "title": f"Document {i}", + "description": f"Description for document {i}.", + "category": "batch", + "year": 2020 + i, + "embedding": embedding_values(FTS_INDEX_DIMENSION), + } + for i in range(5) + ] + + response = fts_index.upsert_documents(namespace=namespace, documents=documents) + + assert response.upserted_count == 5 + + def test_upsert_and_search(self, fts_index): + """Test that upserted documents are searchable.""" + namespace = f"upsert-search-{str(uuid.uuid4())[:8]}" + + documents = [ + { + "_id": "searchable-doc", + "title": "Unique Searchable Title XYZ789", + "description": "A unique description for testing.", + "category": "searchable", + "year": 2024, + "embedding": embedding_values(FTS_INDEX_DIMENSION), + } + ] + + response = fts_index.upsert_documents(namespace=namespace, documents=documents) + assert response.upserted_count == 1 + + poll_until_lsn_reconciled(fts_index, response._response_info, namespace=namespace) + + results = fts_index.search_documents( + namespace=namespace, score_by=text_query("title", "XYZ789"), top_k=10 + ) + + assert len(results.documents) == 1 + assert results.documents[0].id == "searchable-doc" + assert "XYZ789" in results.documents[0].title + + def test_upsert_update_existing_document(self, fts_index): + """Test that upserting with same _id updates the document.""" + namespace = f"upsert-update-{str(uuid.uuid4())[:8]}" + + original_doc = [ + { + "_id": "update-doc", + "title": "Original Title", + "description": "Original description.", + "category": "original", + "year": 2020, + "embedding": embedding_values(FTS_INDEX_DIMENSION), + } + ] + + response1 = fts_index.upsert_documents(namespace=namespace, documents=original_doc) + assert response1.upserted_count == 1 + + poll_until_lsn_reconciled(fts_index, response1._response_info, namespace=namespace) + + updated_doc = [ + { + "_id": "update-doc", + "title": "Updated Title QRS456", + "description": "Updated description.", + "category": "updated", + "year": 2025, + "embedding": embedding_values(FTS_INDEX_DIMENSION), + } + ] + + response2 = fts_index.upsert_documents(namespace=namespace, documents=updated_doc) + assert response2.upserted_count == 1 + + poll_until_lsn_reconciled(fts_index, response2._response_info, namespace=namespace) + + results = fts_index.search_documents( + namespace=namespace, score_by=text_query("title", "QRS456"), top_k=10 + ) + + assert len(results.documents) == 1 + assert results.documents[0].title == "Updated Title QRS456" + assert results.documents[0].year == 2025 + + def test_upsert_response_has_response_info(self, fts_index): + """Test that upsert response includes response info with headers.""" + namespace = f"upsert-info-{str(uuid.uuid4())[:8]}" + + documents = [ + { + "_id": "info-doc", + "title": "Info Test", + "description": "Testing response info.", + "category": "info", + "year": 2025, + "embedding": embedding_values(FTS_INDEX_DIMENSION), + } + ] + + response = fts_index.upsert_documents(namespace=namespace, documents=documents) + + assert hasattr(response, "_response_info") + assert response._response_info is not None + + +@pytest.mark.skipif( + os.getenv("USE_GRPC") != "false", reason="Document operations not supported in gRPC" +) +class TestUpsertDocumentsEdgeCases: + """Test edge cases for upsert_documents().""" + + def test_upsert_empty_documents_raises_error(self, fts_index): + """Test that upserting empty document list raises an error.""" + namespace = f"upsert-empty-{str(uuid.uuid4())[:8]}" + + with pytest.raises(ValueError, match="At least one document is required"): + fts_index.upsert_documents(namespace=namespace, documents=[]) + + def test_upsert_without_namespace_raises_error(self, fts_index): + """Test that upserting without namespace raises an error.""" + documents = [ + { + "_id": "no-ns-doc", + "title": "No Namespace", + "description": "Should fail.", + "category": "error", + "year": 2025, + "embedding": embedding_values(FTS_INDEX_DIMENSION), + } + ] + + with pytest.raises(ValueError, match="Namespace is required"): + fts_index.upsert_documents(namespace=None, documents=documents) + + def test_upsert_large_batch(self, fts_index): + """Test upserting a larger batch of documents.""" + namespace = f"upsert-large-{str(uuid.uuid4())[:8]}" + + documents = [ + { + "_id": f"large-doc-{i}", + "title": f"Large Batch Document {i}", + "description": f"Description for large batch document {i}.", + "category": "large", + "year": 2020 + (i % 10), + "embedding": embedding_values(FTS_INDEX_DIMENSION), + } + for i in range(50) + ] + + response = fts_index.upsert_documents(namespace=namespace, documents=documents) + + assert response.upserted_count == 50