Skip to content

Add CI validation workflow, copilot instructions, and collection lifecycle #14

Add CI validation workflow, copilot instructions, and collection lifecycle

Add CI validation workflow, copilot instructions, and collection lifecycle #14

# =============================================================================
# Validate Samples — End-to-end validation for all DocumentDB AI samples
# =============================================================================
#
# PURPOSE:
# Validates that every sample in this repo compiles and (optionally) runs
# correctly against a live Azure DocumentDB + Azure OpenAI deployment.
#
# TWO MODES:
# 1. BUILD-ONLY (automatic) — Triggered on PR/push to ai/** paths.
# Compiles all 5 languages (TypeScript, Python, Go, Java, .NET) to catch
# syntax errors, missing imports, and type issues. No secrets needed.
#
# 2. FULL RUN (manual) — Triggered via workflow_dispatch ("Run workflow" button).
# Builds AND executes every sample against real Azure resources.
# Requires the SAMPLES_ENV_FILE repo secret (see setup below).
# Captures all stdout/stderr as downloadable artifacts.
#
# SETUP — Creating the SAMPLES_ENV_FILE secret:
# 1. Go to repo Settings > Secrets and variables > Actions
# 2. Click "New repository secret"
# 3. Name: SAMPLES_ENV_FILE
# 4. Value: paste your entire .env file contents, e.g.:
# AZURE_DOCUMENTDB_CONNECTION_STRING=mongodb+srv://...
# AZURE_DOCUMENTDB_DATABASENAME=quickstart_db
# AZURE_OPENAI_EMBEDDING_ENDPOINT=https://...openai.azure.com
# AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-3-small
# AZURE_OPENAI_EMBEDDING_KEY=abc123...
# AZURE_OPENAI_EMBEDDING_API_VERSION=2024-06-01
# TOP_K=3
# LOAD_SIZE_BATCH=25
# 5. Click "Add secret"
#
# ARTIFACTS:
# Full-run jobs upload output-*.log files as workflow artifacts (7-day retention).
# Download them from the workflow run's "Artifacts" section to inspect sample output.
#
# =============================================================================
name: Validate Samples
on:
# Build-only on PR and push
pull_request:
paths:
- 'ai/**'
- '.github/workflows/validate-samples.yml'
push:
branches: [main]
paths:
- 'ai/**'
- '.github/workflows/validate-samples.yml'
# Manual trigger for full validation (build + run)
workflow_dispatch:
inputs:
run_mode:
description: 'build-only = compile check only; full = compile + execute against Azure'
required: true
default: 'full'
type: choice
options:
- full
- build-only
permissions:
contents: read
concurrency:
# Separate concurrency groups for auto (PR/push) vs manual full-run
group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.ref }}-${{ inputs.run_mode || 'auto' }}
cancel-in-progress: ${{ github.event_name != 'workflow_dispatch' }}
jobs:
# ============================================================
# BUILD JOBS — Always run (PR, push, and workflow_dispatch)
# Validates that code compiles without needing any secrets.
# ============================================================
build-typescript:
name: Build TypeScript - ${{ matrix.sample }}
runs-on: ubuntu-latest
timeout-minutes: 10
strategy:
fail-fast: false
matrix:
sample:
- vector-search-typescript
- select-algorithm-typescript
steps:
- uses: actions/checkout@v6
- uses: actions/setup-node@v6
with:
node-version: '20'
cache: 'npm'
cache-dependency-path: ai/${{ matrix.sample }}/package-lock.json
- run: npm ci
working-directory: ai/${{ matrix.sample }}
- run: npm run build
working-directory: ai/${{ matrix.sample }}
build-dotnet:
name: Build .NET
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@v6
- uses: actions/setup-dotnet@v4
with:
dotnet-version: '8.0.x'
- run: dotnet build documentdb-samples.sln
build-go:
name: Build Go - ${{ matrix.sample }}
runs-on: ubuntu-latest
timeout-minutes: 10
strategy:
fail-fast: false
matrix:
sample:
- vector-search-go
- select-algorithm-go
steps:
- uses: actions/checkout@v6
- uses: actions/setup-go@v6
with:
go-version: '1.23'
cache-dependency-path: ai/${{ matrix.sample }}/go.sum
- name: Build Go
working-directory: ai/${{ matrix.sample }}
# Go samples have multiple main() files sharing utils.go — build each independently
run: |
if [ -d "src" ] && [ "$(grep -rl '^func main()' src/*.go 2>/dev/null | wc -l)" -gt 1 ]; then
cd src
for f in $(grep -l '^func main()' *.go); do
echo "Building $f + utils.go"
go build -o /dev/null "$f" utils.go
done
else
go build ./...
fi
build-python:
name: Build Python - ${{ matrix.sample }}
runs-on: ubuntu-latest
timeout-minutes: 10
strategy:
fail-fast: false
matrix:
sample:
- vector-search-python
- select-algorithm-python
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: '3.11'
- run: pip install -r requirements.txt
working-directory: ai/${{ matrix.sample }}
- name: Validate syntax
working-directory: ai/${{ matrix.sample }}
run: find . -name "*.py" -exec python -m py_compile {} +
build-java:
name: Build Java - ${{ matrix.sample }}
runs-on: ubuntu-latest
timeout-minutes: 10
strategy:
fail-fast: false
matrix:
sample:
- vector-search-java
- select-algorithm-java
steps:
- uses: actions/checkout@v6
- uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: '21'
cache: 'maven'
- run: mvn compile -DskipTests
working-directory: ai/${{ matrix.sample }}
# ============================================================
# FULL-RUN JOBS — Only on workflow_dispatch with run_mode=full
# Executes samples against live Azure resources using the
# SAMPLES_ENV_FILE repo secret. Captures output as artifacts.
# ============================================================
preflight:
name: Preflight — Verify secret exists
if: github.event_name == 'workflow_dispatch' && inputs.run_mode == 'full'
runs-on: ubuntu-latest
timeout-minutes: 2
steps:
- name: Check SAMPLES_ENV_FILE secret
run: |
if [ -z "$ENV_CONTENT" ]; then
echo "::error::SAMPLES_ENV_FILE secret is not set. See workflow header for setup instructions."
exit 1
fi
echo "✅ SAMPLES_ENV_FILE secret is configured"
env:
ENV_CONTENT: ${{ secrets.SAMPLES_ENV_FILE }}
run-typescript:
name: Run TypeScript - ${{ matrix.sample }}
if: github.event_name == 'workflow_dispatch' && inputs.run_mode == 'full'
needs: [preflight]
runs-on: ubuntu-latest
timeout-minutes: 20
strategy:
fail-fast: false
matrix:
include:
- sample: vector-search-typescript
scripts: |
node --env-file .env dist/create-embeddings.js 2>&1 | tee output-embed.log
node --env-file .env dist/ivf.js 2>&1 | tee output-ivf.log
node --env-file .env dist/hnsw.js 2>&1 | tee output-hnsw.log
node --env-file .env dist/diskann.js 2>&1 | tee output-diskann.log
- sample: select-algorithm-typescript
scripts: |
node --env-file .env dist/compare-all.js 2>&1 | tee output-compare.log
steps:
- uses: actions/checkout@v6
- uses: actions/setup-node@v6
with:
node-version: '20'
cache: 'npm'
cache-dependency-path: ai/${{ matrix.sample }}/package-lock.json
- run: npm ci
working-directory: ai/${{ matrix.sample }}
- run: npm run build
working-directory: ai/${{ matrix.sample }}
- name: Write .env from secret
working-directory: ai/${{ matrix.sample }}
run: printf '%s\n' "$ENV_CONTENT" > .env
env:
ENV_CONTENT: ${{ secrets.SAMPLES_ENV_FILE }}
- name: Run sample
working-directory: ai/${{ matrix.sample }}
run: |
set -euo pipefail
${{ matrix.scripts }}
- name: Upload logs
if: always()
uses: actions/upload-artifact@v4
with:
name: logs-typescript-${{ matrix.sample }}
path: ai/${{ matrix.sample }}/output-*.log
retention-days: 7
run-python:
name: Run Python - ${{ matrix.sample }}
if: github.event_name == 'workflow_dispatch' && inputs.run_mode == 'full'
needs: [run-typescript, build-python, preflight]
runs-on: ubuntu-latest
timeout-minutes: 20
strategy:
fail-fast: false
matrix:
include:
- sample: vector-search-python
scripts: |
python src/create_embeddings.py 2>&1 | tee output-embed.log
python src/ivf.py 2>&1 | tee output-ivf.log
python src/hnsw.py 2>&1 | tee output-hnsw.log
python src/diskann.py 2>&1 | tee output-diskann.log
- sample: select-algorithm-python
scripts: |
python src/compare_all.py 2>&1 | tee output-compare.log
steps:
- uses: actions/checkout@v6
- uses: actions/setup-python@v6
with:
python-version: '3.11'
- run: pip install -r requirements.txt
working-directory: ai/${{ matrix.sample }}
- name: Write .env from secret
working-directory: ai/${{ matrix.sample }}
run: printf '%s\n' "$ENV_CONTENT" > .env
env:
ENV_CONTENT: ${{ secrets.SAMPLES_ENV_FILE }}
- name: Run sample
working-directory: ai/${{ matrix.sample }}
run: |
set -euo pipefail
${{ matrix.scripts }}
- name: Upload logs
if: always()
uses: actions/upload-artifact@v4
with:
name: logs-python-${{ matrix.sample }}
path: ai/${{ matrix.sample }}/output-*.log
retention-days: 7
run-go:
name: Run Go - ${{ matrix.sample }}
if: github.event_name == 'workflow_dispatch' && inputs.run_mode == 'full'
needs: [run-python, build-go, preflight]
runs-on: ubuntu-latest
timeout-minutes: 20
strategy:
fail-fast: false
matrix:
include:
- sample: vector-search-go
scripts: |
go run create_embeddings.go utils.go 2>&1 | tee output-embed.log
go run ivf.go utils.go 2>&1 | tee output-ivf.log
go run hnsw.go utils.go 2>&1 | tee output-hnsw.log
go run diskann.go utils.go 2>&1 | tee output-diskann.log
workdir: ai/vector-search-go/src
- sample: select-algorithm-go
scripts: |
go run compare_all.go utils.go 2>&1 | tee output-compare.log
workdir: ai/select-algorithm-go/src
steps:
- uses: actions/checkout@v6
- uses: actions/setup-go@v6
with:
go-version: '1.23'
cache-dependency-path: ai/${{ matrix.sample }}/go.sum
- name: Write .env from secret
working-directory: ${{ matrix.workdir }}
run: printf '%s\n' "$ENV_CONTENT" > .env
env:
ENV_CONTENT: ${{ secrets.SAMPLES_ENV_FILE }}
- name: Run sample
working-directory: ${{ matrix.workdir }}
run: |
set -euo pipefail
${{ matrix.scripts }}
- name: Upload logs
if: always()
uses: actions/upload-artifact@v4
with:
name: logs-go-${{ matrix.sample }}
path: ${{ matrix.workdir }}/output-*.log
retention-days: 7
run-java:
name: Run Java - ${{ matrix.sample }}
if: github.event_name == 'workflow_dispatch' && inputs.run_mode == 'full'
needs: [run-go, build-java, preflight]
runs-on: ubuntu-latest
timeout-minutes: 20
strategy:
fail-fast: false
matrix:
include:
- sample: vector-search-java
classes: DiskAnn HNSW IVF
package: com.azure.documentdb.samples
- sample: select-algorithm-java
classes: CompareAll
package: com.azure.documentdb.selectalgorithm
steps:
- uses: actions/checkout@v6
- uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: '21'
cache: 'maven'
- run: mvn compile -DskipTests
working-directory: ai/${{ matrix.sample }}
- name: Export env vars from secret
run: |
while IFS= read -r line; do
[[ -z "$line" || "$line" == \#* ]] && continue
key="${line%%=*}"
value="${line#*=}"
echo "::add-mask::$value"
echo "$key=$value" >> "$GITHUB_ENV"
done <<< "$ENV_CONTENT"
env:
ENV_CONTENT: ${{ secrets.SAMPLES_ENV_FILE }}
- name: Run sample
working-directory: ai/${{ matrix.sample }}
run: |
set -euo pipefail
for class in ${{ matrix.classes }}; do
echo "=== Running $class ==="
mvn exec:java -Dexec.mainClass="${{ matrix.package }}.$class" 2>&1 | tee "output-${class,,}.log"
done
- name: Upload logs
if: always()
uses: actions/upload-artifact@v4
with:
name: logs-java-${{ matrix.sample }}
path: ai/${{ matrix.sample }}/output-*.log
retention-days: 7
run-dotnet:
name: Run .NET - ${{ matrix.sample }}
if: github.event_name == 'workflow_dispatch' && inputs.run_mode == 'full'
needs: [run-java, build-dotnet, preflight]
runs-on: ubuntu-latest
timeout-minutes: 20
strategy:
fail-fast: false
matrix:
include:
- sample: vector-search-dotnet
project: ai/vector-search-dotnet/DocumentDBVectorSearch.csproj
- sample: select-algorithm-dotnet
project: ai/select-algorithm-dotnet/src/SelectAlgorithm.csproj
steps:
- uses: actions/checkout@v6
- uses: actions/setup-dotnet@v4
with:
dotnet-version: '8.0.x'
- name: Export env vars from secret
run: |
while IFS= read -r line; do
[[ -z "$line" || "$line" == \#* ]] && continue
key="${line%%=*}"
value="${line#*=}"
echo "::add-mask::$value"
echo "$key=$value" >> "$GITHUB_ENV"
done <<< "$ENV_CONTENT"
env:
ENV_CONTENT: ${{ secrets.SAMPLES_ENV_FILE }}
- name: Run sample
run: |
set -euo pipefail
dotnet run --project ${{ matrix.project }} 2>&1 | tee output-run.log
- name: Upload logs
if: always()
uses: actions/upload-artifact@v4
with:
name: logs-dotnet-${{ matrix.sample }}
path: output-run.log
retention-days: 7
# ============================================================
# SUMMARY — Aggregates pass/fail status across all languages
# ============================================================
summary:
name: Results Summary
if: github.event_name == 'workflow_dispatch' && inputs.run_mode == 'full' && always()
needs: [preflight, run-typescript, run-python, run-go, run-java, run-dotnet]
runs-on: ubuntu-latest
steps:
- name: Generate summary table
run: |
echo "## 🧪 Full Validation Results" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Language | Status |" >> $GITHUB_STEP_SUMMARY
echo "|----------|--------|" >> $GITHUB_STEP_SUMMARY
echo "| TypeScript | ${{ needs.run-typescript.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| Python | ${{ needs.run-python.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| Go | ${{ needs.run-go.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| Java | ${{ needs.run-java.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| .NET | ${{ needs.run-dotnet.result }} |" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "📦 Download artifacts for full output logs." >> $GITHUB_STEP_SUMMARY