Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
*.userosscache
*.sln.docstates
*.env
*.exe

# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ Azure Cosmos DB provides integrated vector search capabilities for AI-powered se
- DiskANN, Flat, and QuantizedFlat indexing algorithms
- Managed identity authentication
- Comprehensive documentation and examples
- **[nosql-create-index-dotnet](./nosql-create-index-dotnet/)** - .NET sample for existing Cosmos DB NoSQL vector containers
- Data-plane only operations against pre-provisioned DiskANN and QuantizedFlat containers
- `DefaultAzureCredential` authentication and Azure OpenAI embeddings
- Bulk-friendly ingestion and `VectorDistance()` query examples

## 🚀 Features

Expand Down
111 changes: 111 additions & 0 deletions infra/cosmos-db/nosql/vector-containers.bicep
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
metadata description = 'Create DiskANN and QuantizedFlat Azure Cosmos DB for NoSQL containers.'

param parentAccountName string
param parentDatabaseName string
param tags object = {}

@description('Enables throughput setting at this resource level. Defaults to false.')
param setThroughput bool = false

@description('Enables autoscale. If setThroughput is enabled, defaults to false.')
param autoscale bool = false

@description('The amount of throughput set. If setThroughput is enabled, defaults to 400.')
param throughput int = 400

@description('List of hierarhical partition key paths applied to both containers.')
param partitionKeyPaths string[]

@description('Vector field path applied to both containers.')
param vectorPath string

@description('Embedding dimensions applied to both containers.')
param vectorDimensions int = 1536

@description('Distance function applied to both containers.')
param distanceFunction string = 'cosine'

@description('Name of the DiskANN container.')
param diskAnnContainerName string = 'hotels_diskann'

@description('Name of the QuantizedFlat container.')
param quantizedFlatContainerName string = 'hotels_quantizedflat'

@description('Prefix for nested deployment names to avoid conflicts when this module is called multiple times.')
param deploymentNamePrefix string = 'vector-container'

var containers = [
{
name: diskAnnContainerName
vectorIndexType: 'diskANN'
excludedPaths: [
{
path: '/_etag/?'
}
]
}
{
name: quantizedFlatContainerName
vectorIndexType: 'quantizedFlat'
excludedPaths: [
{
path: '/_etag/?'
}
{
path: '${vectorPath}/*'
}
]
}
]

module vectorContainers './container.bicep' = [
for (container, index) in containers: {
name: '${deploymentNamePrefix}-${index}'
params: {
name: container.name
parentAccountName: parentAccountName
parentDatabaseName: parentDatabaseName
tags: tags
setThroughput: setThroughput
autoscale: autoscale
throughput: throughput
partitionKeyPaths: partitionKeyPaths
indexingPolicy: {
indexingMode: 'consistent'
automatic: true
includedPaths: [
{
path: '/*'
}
]
excludedPaths: container.excludedPaths
vectorIndexes: [
{
path: vectorPath
type: container.vectorIndexType
}
]
}
vectorEmbeddingPolicy: {
vectorEmbeddings: [
{
path: vectorPath
dataType: 'float32'
dimensions: vectorDimensions
distanceFunction: distanceFunction
}
]
}
}
}
]

output containers array = [
for (container, index) in containers: {
name: vectorContainers[index].outputs.name
vectorIndexType: container.vectorIndexType
partitionKeyPaths: partitionKeyPaths
vectorPath: vectorPath
dimensions: vectorDimensions
}
]
153 changes: 53 additions & 100 deletions infra/database.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -6,89 +6,12 @@ param tags object = {}
param managedIdentityPrincipalId string
param deploymentUserPrincipalId string = ''
param databaseName string
param createIndexDatabaseName string = ''

var database = {
name: databaseName // Database for application
name: databaseName // Database for existing vector-search samples
}

var containers = [
{
name: 'hotels_diskann'
partitionKeyPaths: [
'/HotelId'
]
indexingPolicy: {
indexingMode: 'consistent'
automatic: true
includedPaths: [
{
path: '/*'
}
]
excludedPaths: [
{
path: '/_etag/?'
}
]
vectorIndexes: [
{
path: '/DescriptionVector'
type: 'diskANN'
}
]
}
vectorEmbeddingPolicy: {
vectorEmbeddings: [
{
path: '/DescriptionVector'
dataType: 'float32'
dimensions: 1536
distanceFunction: 'cosine'
}
]
}
}
{
name: 'hotels_quantizedflat'
partitionKeyPaths: [
'/HotelId'
]
indexingPolicy: {
indexingMode: 'consistent'
automatic: true
includedPaths: [
{
path: '/*'
}
]
excludedPaths: [
{
path: '/_etag/?'
}
{
path: '/DescriptionVector/*'
}
]
vectorIndexes: [
{
path: '/DescriptionVector'
type: 'quantizedFlat'
}
]
}
vectorEmbeddingPolicy: {
vectorEmbeddings: [
{
path: '/DescriptionVector'
dataType: 'float32'
dimensions: 1536
distanceFunction: 'cosine'
}
]
}
}
]



module cosmosDbAccount './cosmos-db/nosql/account.bicep' = {
Expand All @@ -106,30 +29,56 @@ module cosmosDbAccount './cosmos-db/nosql/account.bicep' = {
}

module cosmosDbDatabase './cosmos-db/nosql/database.bicep' = {
name: 'cosmos-db-database'
name: 'cosmos-db-database'
params: {
name: database.name
name: database.name
parentAccountName: cosmosDbAccount.outputs.name
tags: tags
setThroughput: false
}
}

module cosmosDbContainers './cosmos-db/nosql/container.bicep' = [
for (container, index) in containers: {
name: 'cosmos-db-container-${index}'
params: {
name: container.name
parentAccountName: cosmosDbAccount.outputs.name
parentDatabaseName: cosmosDbDatabase.outputs.name
tags: tags
setThroughput: false
partitionKeyPaths: container.partitionKeyPaths
indexingPolicy: container.indexingPolicy
vectorEmbeddingPolicy: container.vectorEmbeddingPolicy
}
module vectorSearchContainers './cosmos-db/nosql/vector-containers.bicep' = {
name: 'cosmos-db-vector-search-containers'
params: {
parentAccountName: cosmosDbAccount.outputs.name
parentDatabaseName: cosmosDbDatabase.outputs.name
tags: tags
setThroughput: false
partitionKeyPaths: [
'/HotelId'
]
vectorPath: '/DescriptionVector'
vectorDimensions: 1536
deploymentNamePrefix: 'vector-search-container'
}
}

module createIndexDatabase './cosmos-db/nosql/database.bicep' = if (!empty(createIndexDatabaseName)) {
name: 'cosmos-db-create-index-database'
params: {
name: createIndexDatabaseName
parentAccountName: cosmosDbAccount.outputs.name
tags: tags
setThroughput: false
}
}

module createIndexContainers './cosmos-db/nosql/vector-containers.bicep' = if (!empty(createIndexDatabaseName)) {
name: 'cosmos-db-create-index-containers'
params: {
parentAccountName: cosmosDbAccount.outputs.name
parentDatabaseName: createIndexDatabase!.outputs.name
tags: tags
setThroughput: false
partitionKeyPaths: [
'/PartitionKey'
]
vectorPath: '/DescriptionVector'
vectorDimensions: 1536
deploymentNamePrefix: 'create-index-container'
}
]
}

// Access to data plane only
// no access to control plane (e.g. creating databases, containers, etc.)
Expand Down Expand Up @@ -174,9 +123,13 @@ output accountName string = cosmosDbAccount.outputs.name
output database object = {
name: cosmosDbDatabase.outputs.name
}
output containers array = [
for (_, index) in containers: {
name: cosmosDbContainers[index].outputs.name
}
]
output containers array = vectorSearchContainers.outputs.containers
output createIndexDatabase object = !empty(createIndexDatabaseName)
? {
name: createIndexDatabase!.outputs.name
}
: {}
output createIndexContainers array = !empty(createIndexDatabaseName)
? createIndexContainers!.outputs.containers
: []

8 changes: 8 additions & 0 deletions infra/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ module managedIdentity 'br/public:avm/res/managed-identity/user-assigned-identit
var dataFileWithVectors = '../data/HotelsData_toCosmosDB_Vector.json'
var dataFileWithoutVectors = '../data/HotelsData_toCosmosDB.JSON'
var databaseName = 'Hotels'
var createIndexDatabaseName = 'HotelsCreateIndex'
var fieldToEmbed = 'Description'
var embeddedFieldName = 'DescriptionVector'
var embeddingDimensions = '1536'
Expand Down Expand Up @@ -150,6 +151,7 @@ module database './database.bicep' = {
managedIdentityPrincipalId: managedIdentity.outputs.principalId
deploymentUserPrincipalId: deploymentUserPrincipalId
databaseName: databaseName
createIndexDatabaseName: createIndexDatabaseName
}
}

Expand All @@ -175,6 +177,12 @@ output AZURE_OPENAI_EMBEDDING_API_VERSION string = embeddingModelApiVersion
// Environment variables needed by utils.ts
output AZURE_COSMOSDB_ENDPOINT string = database.outputs.endpoint
output AZURE_COSMOSDB_DATABASENAME string = databaseName
output AZURE_COSMOSDB_CREATE_INDEX_DATABASENAME string = !empty(createIndexDatabaseName) ? createIndexDatabaseName : ''
output AZURE_COSMOSDB_CREATE_INDEX_DISKANN_CONTAINER_NAME string = !empty(createIndexDatabaseName) ? database.outputs.createIndexContainers[0].name : ''
output AZURE_COSMOSDB_CREATE_INDEX_QUANTIZEDFLAT_CONTAINER_NAME string = !empty(createIndexDatabaseName) ? database.outputs.createIndexContainers[1].name : ''
output AZURE_COSMOSDB_CREATE_INDEX_EMBEDDED_FIELD string = 'DescriptionVector'
output AZURE_COSMOSDB_CREATE_INDEX_PARTITION_KEY_PATH string = '/PartitionKey'
output AZURE_COSMOSDB_CREATE_INDEX_EMBEDDING_DIMENSIONS string = '1536'

// Configuration for embedding creation and vector search
output DATA_FILE_WITH_VECTORS string = dataFileWithVectors
Expand Down
Loading