Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 114 additions & 0 deletions pinecone/db_data/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from pinecone.openapi_support import ApiClient
from pinecone.core.openapi.db_data.api.vector_operations_api import VectorOperationsApi
from pinecone.core.openapi.db_data.api.document_operations_api import DocumentOperationsApi
from pinecone.core.openapi.db_data import API_VERSION
from pinecone.core.openapi.db_data.models import (
QueryResponse as OpenAPIQueryResponse,
Expand All @@ -30,6 +31,10 @@
QueryResponse,
UpsertResponse,
UpdateResponse,
TextQuery,
VectorQuery,
DocumentSearchResponse,
Document,
)
from .interfaces import IndexInterface
from .request_factory import IndexRequestFactory
Expand Down Expand Up @@ -158,6 +163,9 @@ class Index(PluginAware, IndexInterface):
_namespace_resource: "NamespaceResource" | None
""" :meta private: """

_document_api: DocumentOperationsApi | None
""" :meta private: """

def __init__(
self,
api_key: str,
Expand Down Expand Up @@ -202,6 +210,9 @@ def __init__(
self._namespace_resource = None
""" :meta private: """

self._document_api = None
""" :meta private: """

# Pass the same api_client to the ImportFeatureMixin
super().__init__(api_client=self._api_client)

Expand Down Expand Up @@ -253,6 +264,13 @@ def namespace(self) -> "NamespaceResource":
)
return self._namespace_resource

@property
def document_api(self) -> DocumentOperationsApi:
""":meta private:"""
if self._document_api is None:
self._document_api = DocumentOperationsApi(api_client=self._api_client)
return self._document_api

def _openapi_kwargs(self, kwargs: dict[str, Any]) -> dict[str, Any]:
return filter_dict(kwargs, OPENAPI_ENDPOINT_PARAMS)

Expand Down Expand Up @@ -800,6 +818,102 @@ def search_records(
"""
return self.search(namespace, query=query, rerank=rerank, fields=fields)

@validate_and_convert_errors
def search_documents(
self,
namespace: str,
score_by: TextQuery | VectorQuery,
filter: FilterTypedDict | None = None,
include_fields: list[str] | None = None,
top_k: int = 10,
) -> DocumentSearchResponse:
"""Search for documents in a namespace.

This operation searches a namespace using text or vector queries and returns
matching documents with their scores.

Args:
namespace: The namespace to search in.
score_by: A :class:`~pinecone.TextQuery` or :class:`~pinecone.VectorQuery`
object defining how to rank results.
filter: Optional metadata filter. Supports ``$text_match`` for FTS filtering. [optional]
include_fields: Optional list of fields to include in results. Use ``["*"]``
to return all fields. [optional]
top_k: Number of results to return. Defaults to 10.

Returns:
DocumentSearchResponse: Response containing matching documents and usage info.

Examples:

.. code-block:: python

from pinecone import Pinecone, text_query, vector_query

pc = Pinecone()
index = pc.Index(host="example-index-host")

# Simple text search
results = index.search_documents(
namespace="movies",
score_by=text_query("title", 'return "pink panther"'),
filter={"genre": {"$eq": "comedy"}},
top_k=10,
)

# Access results
for doc in results.documents:
print(f"{doc.id}: {doc.score}")
print(f"Title: {doc.title}")

# Vector search with text filter
results = index.search_documents(
namespace="logs",
score_by=vector_query("embedding", values=[0.1, 0.2, 0.3]),
filter={
"service": {"$eq": "payment-gateway"},
"message": {"$text_match": '+error +\"connection refused\"'},
},
include_fields=["message", "timestamp"],
top_k=10,
)

"""
if namespace is None:
raise ValueError("Namespace is required when searching documents")

request = IndexRequestFactory.search_documents_request(
score_by=score_by, top_k=top_k, filter=filter, include_fields=include_fields
)

result = self.document_api.search_documents(namespace, request)

# Convert OpenAPI response to our dataclass
documents: list[Document] = []
if hasattr(result, "documents") and result.documents:
for doc in result.documents:
# Extract id and score, rest goes to fields
doc_dict = doc.to_dict() if hasattr(doc, "to_dict") else dict(doc)
doc_id = doc_dict.pop("id", doc_dict.pop("_id", ""))
score = doc_dict.pop("score", 0.0)
documents.append(Document(id=doc_id, score=score, **doc_dict))

# Extract usage info
usage = result.usage if hasattr(result, "usage") else None

# Extract response info
from pinecone.utils.response_info import extract_response_info

response_info = None
if hasattr(result, "_response_info"):
response_info = result._response_info
if response_info is None:
response_info = extract_response_info({})

return DocumentSearchResponse(
documents=documents, usage=usage, _response_info=response_info
)

@validate_and_convert_errors
def delete(
self,
Expand Down
104 changes: 104 additions & 0 deletions pinecone/db_data/index_asyncio.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from pinecone.openapi_support import AsyncioApiClient
from pinecone.core.openapi.db_data.api.vector_operations_api import AsyncioVectorOperationsApi
from pinecone.core.openapi.db_data.api.document_operations_api import AsyncioDocumentOperationsApi
from pinecone.core.openapi.db_data import API_VERSION
from pinecone.core.openapi.db_data.models import (
QueryResponse as OpenAPIQueryResponse,
Expand Down Expand Up @@ -56,6 +57,10 @@
QueryResponse,
UpsertResponse,
UpdateResponse,
TextQuery,
VectorQuery,
DocumentSearchResponse,
Document,
)

from pinecone.openapi_support import OPENAPI_ENDPOINT_PARAMS
Expand Down Expand Up @@ -187,6 +192,9 @@ async def main():
_namespace_resource: "NamespaceResourceAsyncio" | None
""" :meta private: """

_document_api: AsyncioDocumentOperationsApi | None
""" :meta private: """

def __init__(
self,
api_key: str,
Expand Down Expand Up @@ -224,6 +232,9 @@ def __init__(
self._namespace_resource = None
""" :meta private: """

self._document_api = None
""" :meta private: """

async def __aenter__(self) -> Self:
return self

Expand Down Expand Up @@ -304,6 +315,13 @@ def namespace(self) -> "NamespaceResourceAsyncio":
self._namespace_resource = NamespaceResourceAsyncio(api_client=self._api_client)
return self._namespace_resource

@property
def document_api(self) -> AsyncioDocumentOperationsApi:
""":meta private:"""
if self._document_api is None:
self._document_api = AsyncioDocumentOperationsApi(api_client=self._api_client)
return self._document_api

@validate_and_convert_errors
async def upsert(
self,
Expand Down Expand Up @@ -795,6 +813,92 @@ async def search_records(
) -> SearchRecordsResponse:
return await self.search(namespace, query=query, rerank=rerank, fields=fields)

@validate_and_convert_errors
async def search_documents(
self,
namespace: str,
score_by: TextQuery | VectorQuery,
filter: Dict | None = None,
include_fields: List[str] | None = None,
top_k: int = 10,
) -> DocumentSearchResponse:
"""Search for documents in a namespace.

This operation searches a namespace using text or vector queries and returns
matching documents with their scores.

Args:
namespace: The namespace to search in.
score_by: A :class:`~pinecone.TextQuery` or :class:`~pinecone.VectorQuery`
object defining how to rank results.
filter: Optional metadata filter. Supports ``$text_match`` for FTS filtering. [optional]
include_fields: Optional list of fields to include in results. Use ``["*"]``
to return all fields. [optional]
top_k: Number of results to return. Defaults to 10.

Returns:
DocumentSearchResponse: Response containing matching documents and usage info.

Examples:

.. code-block:: python

import asyncio
from pinecone import Pinecone, text_query, vector_query

async def main():
pc = Pinecone()
async with pc.IndexAsyncio(host="example-index-host") as index:
# Simple text search
results = await index.search_documents(
namespace="movies",
score_by=text_query("title", 'return "pink panther"'),
filter={"genre": {"$eq": "comedy"}},
top_k=10,
)

# Access results
for doc in results.documents:
print(f"{doc.id}: {doc.score}")

asyncio.run(main())

"""
if namespace is None:
raise ValueError("Namespace is required when searching documents")

request = IndexRequestFactory.search_documents_request(
score_by=score_by, top_k=top_k, filter=filter, include_fields=include_fields
)

result = await self.document_api.search_documents(namespace, request)

# Convert OpenAPI response to our dataclass
documents: List[Document] = []
if hasattr(result, "documents") and result.documents:
for doc in result.documents:
# Extract id and score, rest goes to fields
doc_dict = doc.to_dict() if hasattr(doc, "to_dict") else dict(doc)
doc_id = doc_dict.pop("id", doc_dict.pop("_id", ""))
score = doc_dict.pop("score", 0.0)
documents.append(Document(id=doc_id, score=score, **doc_dict))

# Extract usage info
usage = result.usage if hasattr(result, "usage") else None

# Extract response info
from pinecone.utils.response_info import extract_response_info

response_info = None
if hasattr(result, "_response_info"):
response_info = result._response_info
if response_info is None:
response_info = extract_response_info({})

return DocumentSearchResponse(
documents=documents, usage=usage, _response_info=response_info
)

def _openapi_kwargs(self, kwargs: dict[str, Any]) -> dict[str, Any]:
return filter_dict(kwargs, OPENAPI_ENDPOINT_PARAMS)

Expand Down
54 changes: 54 additions & 0 deletions pinecone/db_data/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@
UpdateResponse,
SparseValues,
Vector,
TextQuery,
VectorQuery,
DocumentSearchResponse,
)
from pinecone.utils import require_kwargs

Expand Down Expand Up @@ -453,6 +456,57 @@ def search_records(
"""
pass

@abstractmethod
def search_documents(
self,
namespace: str,
score_by: TextQuery | VectorQuery,
filter: FilterTypedDict | None = None,
include_fields: list[str] | None = None,
top_k: int = 10,
) -> DocumentSearchResponse:
"""Search for documents in a namespace.

This operation searches a namespace using text or vector queries and returns
matching documents with their scores.

Args:
namespace: The namespace to search in.
score_by: A :class:`~pinecone.TextQuery` or :class:`~pinecone.VectorQuery`
object defining how to rank results.
filter: Optional metadata filter. Supports ``$text_match`` for FTS filtering. [optional]
include_fields: Optional list of fields to include in results. Use ``["*"]``
to return all fields. [optional]
top_k: Number of results to return. Defaults to 10.

Returns:
DocumentSearchResponse: Response containing matching documents and usage info.

Examples:

.. code-block:: python

from pinecone import Pinecone, text_query, vector_query

pc = Pinecone()
index = pc.Index(host="example-index-host")

# Simple text search
results = index.search_documents(
namespace="movies",
score_by=text_query("title", 'return "pink panther"'),
filter={"genre": {"$eq": "comedy"}},
top_k=10,
)

# Access results
for doc in results.documents:
print(f"{doc.id}: {doc.score}")
print(f"Title: {doc.title}")

"""
pass

@abstractmethod
def delete(
self,
Expand Down
Loading