Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 117 additions & 0 deletions .github/workflows/related_docs_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
name: Related docs test

on:
push:
workflow_dispatch:

jobs:
related-docs-test:
runs-on: ubuntu-latest
steps:
- name: Update apt and install packages
run: |
df -h
echo ${PWD}
sudo apt update
sudo apt install -y curl git jq
sudo apt clean
sudo apt autoremove --purge -y

-
name: Set up Docker Compose
uses: docker/setup-compose-action@v1
with:
version: v2.34.0

-
name: Free up space and move Docker storage to /mnt
run: |
echo "Stopping Docker..."
sudo systemctl stop docker

echo "Creating new Docker root at /mnt/docker..."
sudo mkdir -p /mnt/docker
sudo rsync -aqxP /var/lib/docker/ /mnt/docker

echo "Updating Docker daemon config..."
echo '{"data-root": "/mnt/docker"}' | sudo tee /etc/docker/daemon.json

cat /etc/docker/daemon.json

echo "Restarting Docker..."
sudo systemctl start docker

echo "Verifying Docker root directory:"
docker info | grep "Docker Root Dir"

-
name: Checkout current branch
uses: actions/checkout@v5
with:
submodules: true

-
name: Set up env
run: |
cp .github/workflows/related_docs_test/.env ./
cp .github/workflows/related_docs_test/*.yaml ./

-
name: Patch vllm to v0.9.2
run: sed -Ei 's/checkout v[0-9\.]+/checkout v0.9.2/' extern/vllm/Dockerfile.cpu

-
name: Build
run: docker compose --profile cpu build

-
name: Run
run: |
docker compose --profile cpu up -d || docker logs openrag-vllm-cpu-1
.github/workflows/smoke_test/wait_for_healthy.sh openrag-vllm-cpu-1

-
name: Cleanup
run: |
docker container prune -f
docker image prune -f
docker builder prune -f
df -h

-
name: List containers
run: docker container ls

-
name: Install Python venv
run: |
python3 -m venv venv
source venv/bin/activate
pip3 install -r utility/requirements.txt

-
name: Wait for OpenRag to start
run: |
.github/workflows/related_docs_test/wait_for_services.sh

-
name: Index small documents (relations test)
run: |
echo "Sun is shining" | .github/workflows/related_docs_test/index_file.sh http://localhost:8080 rel_test root.txt
sleep 10s
echo "Cats meow" | .github/workflows/related_docs_test/index_child_file.sh http://localhost:8080 rel_test child_a.txt root.txt
sleep 10s
echo "There are letters in the text" | .github/workflows/related_docs_test/index_child_file.sh http://localhost:8080 rel_test leaf_a.txt child_a.txt
sleep 30s

-
name: Query small documents (relations test)
run: |
echo "Sun Is Shining" | .github/workflows/related_docs_test/chat_completion.sh http://localhost:8080 rel_test | grep -v '{\\"' | grep file_id | grep "root.txt"
echo "Sun Is Shining" | .github/workflows/related_docs_test/chat_completion.sh http://localhost:8080 rel_test | grep -v '{\\"' | grep file_id | grep "child_a.txt"
echo "Cats meow" | .github/workflows/related_docs_test/chat_completion.sh http://localhost:8080 rel_test | grep -v '{\\"' | grep file_id | grep "root.txt"
echo "Cats meow" | .github/workflows/related_docs_test/chat_completion.sh http://localhost:8080 rel_test | grep -v '{\\"' | grep file_id | grep "child_a.txt"
echo "Cats meow" | .github/workflows/related_docs_test/chat_completion.sh http://localhost:8080 rel_test | grep -v '{\\"' | grep file_id | grep "leaf_a.txt"
echo "There are letters in the text" | .github/workflows/related_docs_test/chat_completion.sh http://localhost:8080 rel_test | grep -v '{\\"' | grep file_id | grep "child_a.txt"
echo "There are letters in the text" | .github/workflows/related_docs_test/chat_completion.sh http://localhost:8080 rel_test | grep -v '{\\"' | grep file_id | grep "leaf_a.txt"

67 changes: 67 additions & 0 deletions .github/workflows/related_docs_test/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# LLM
BASE_URL=http://mock-llm:8080/v1/
API_KEY=sk-
MODEL=mock-model

# VLM (Visual Language Model) you can set it to the same as LLM if your LLM supports images
VLM_BASE_URL=http://mock-llm:8080/v1/
VLM_API_KEY=sk-
VLM_MODEL=mock-model

RAGMODE=SimpleRag
## FastAPI App (no need to change it)
# APP_PORT=8080 # this is the forwarded port
# API_NUM_WORKERS=1 # Number of uvicorn workers for the FastAPI app

## To enable API HTTP authentication via HTTPBearer
# AUTH_TOKEN=sk-openrag-1234

# SAVE_UPLOADED_FILES=true # usefull for chainlit source viewing

# Set to true, it will mount chainlit chat ui to the fastapi app (Default: true)
## WITH_CHAINLIT_UI=true

# RETRIEVER
CONTEXTUAL_RETRIEVAL=false

# EMBEDDER
EMBEDDER_MODEL_NAME=Qwen/Qwen3-Embedding-0.6B
EMBEDDER_BASE_URL=http://vllm:8000/v1
# EMBEDDER_API_KEY=EMPTY

# RERANKER
RERANKER_ENABLED=false
RERANKER_MODEL=Alibaba-NLP/gte-multilingual-reranker-base # or jinaai/jina-reranker-v2-base-multilingual

# Prompts
PROMPTS_DIR=../prompts/example1

# Loaders
PDFLoader=MarkerLoader
XDG_CACHE_HOME=/app/model_weights
# If using MarkerLoader
MARKER_MAX_TASKS_PER_CHILD=1
MARKER_MAX_PROCESSES=1
MARKER_MIN_PROCESSES=1
MARKER_POOL_SIZE=1 # Value au increment if you have a cluster of machines
MARKER_NUM_GPUS=0.01

# Ray
RAY_POOL_SIZE=1 # Number of serializer actor instances
RAY_MAX_TASKS_PER_WORKER=2 # Number of tasks per serializer
RAY_DEDUP_LOGS=0 # turns off ray log deduplication that appear across multiple processes
RAY_ENABLE_RECORD_ACTOR_TASK_LOGGING=1 # # to enable logs at task level in ray dashboard
RAY_task_retry_delay_ms=3000
RAY_ENABLE_UV_RUN_RUNTIME_ENV=0 # critical with the newest version of UV
RAY_memory_monitor_refresh_ms=0

# Indexer UI
## 1. replace X.X.X.X with localhost if launching local or with your server IP
## 2. APP_PORT with your FastAPI port (8080 by default)
## 3. Base URL of the Indexer UI (required to prevent CORS issues). Replace INDEXERUI_PORT with its value
## 4. Base URL of your FastAPI backend. Used by the frondend. Replace APP_PORT with the actual port number of your FastAPI backend

VITE_INCLUDE_CREDENTIALS=false # set true if fastapi authentification is enabled
INDEXERUI_PORT=8060 # Port to expose the Indexer UI (default is 3042)
INDEXERUI_URL='http://X.X.X.X:INDEXERUI_PORT'
VITE_API_BASE_URL='http://X.X.X.X:APP_PORT'
34 changes: 34 additions & 0 deletions .github/workflows/related_docs_test/chat_completion.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env bash
set -euo pipefail

ENDPOINT_URL=$1
PARTITION_NAME=$2
QUERY=$(cat)

payload=$(jq -nc \
--arg model "openrag-${PARTITION_NAME}" \
--arg query "${QUERY}" \
'{
model: $model,
messages: [{role: "user", content: $query}],
temperature: 0.3,
top_p: 1,
stream: false,
max_tokens: 1024,
logprobs: 0,
metadata: {use_map_reduce: false}
}')

#echo ${payload}

response=`curl --connect-timeout 600 -X POST "${ENDPOINT_URL}/v1/chat/completions" \
-H "accept: application/json" \
-H "Content-Type: application/json" \
-d "$payload"`

#echo $response | jq .

extra=`echo $response | jq '.extra | fromjson'`

echo $extra | jq .

141 changes: 141 additions & 0 deletions .github/workflows/related_docs_test/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
include:
- vdb/milvus.yaml
# - extern/infinity.yaml

x-openrag: &openrag_template
#image: ghcr.io/linagora/openrag:dev-latest
build:
context: .
dockerfile: Dockerfile
volumes:
- ${CONFIG_VOLUME:-./.hydra_config}:/app/.hydra_config
- ${DATA_VOLUME:-./data}:/app/data
- ${MODEL_WEIGHTS_VOLUME:-~/.cache/huggingface}:/app/model_weights # Model weights for RAG
- ./openrag:/app/openrag # For dev mode
- /$SHARED_ENV:/ray_mount/.env # Shared environment variables
- ./ray_mount/logs:/app/logs
ports:
- ${APP_PORT:-8080}:${APP_iPORT:-8080}
- ${RAY_DASHBOARD_PORT:-8265}:8265 # Disable when in cluster mode
networks:
default:
aliases:
- openrag
env_file:
- ${SHARED_ENV:-.env}
shm_size: 10.24gb

x-vllm: &vllm_template
networks:
default:
aliases:
- vllm
restart: none # Better to fail in the CI context
environment:
- HUGGING_FACE_HUB_TOKEN
ipc: "host"
volumes:
- ${VLLM_CACHE:-/root/.cache/huggingface}:/root/.cache/huggingface # put ./vllm_cache if you want to have the weights on the vllm_cache folder in your project
command: >
--model ${EMBEDDER_MODEL_NAME:-jinaai/jina-embeddings-v3}
--trust-remote-code
--task embed
--gpu_memory_utilization 0.3
# --max-num-seqs 1
# --max-model-len ${MOX_MODEL_LEN:-2048}
# gpu_memory_utilization, max-num-seqs et max-model-len can be tuned depending on your GPU memory

healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 360s
# ports:
# - ${VLLM_PORT:-8000}:8000
services:
# GPU - default
openrag:
<<: *openrag_template
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [ gpu ]
profiles:
- ''
depends_on:
milvus:
condition: service_healthy
vllm-gpu:
condition: service_healthy

# No GPU
openrag-cpu:
<<: *openrag_template
deploy: {}
profiles:
- 'cpu'
depends_on:
milvus:
condition: service_healthy
vllm-cpu:
condition: service_healthy

rdb:
image: postgres:15
environment:
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-root_password}
- POSTGRES_USER=${POSTGRES_USER:-root}
volumes:
- ${DB_VOLUME:-./db}:/var/lib/postgresql/data

vllm-gpu:
<<: *vllm_template
image: vllm/vllm-openai:v0.9.2
runtime: nvidia
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
profiles:
- '' # Empty string gives default behavior (but does not run when cpu requested)

vllm-cpu:
<<: *vllm_template
build:
context: extern/vllm
dockerfile: Dockerfile.cpu
target: vllm-openai
image: openrag-vllm-openai-cpu
deploy: {}
environment:
# - VLLM_CPU_KVCACHE_SPACE=8 # Default value isn't sufficient for full context length
- VLLM_USE_V1=0 # for ibm-granite/granite-embedding-small-english-r2
command: >
--model ${EMBEDDER_MODEL_NAME:-jinaai/jina-embeddings-v3}
--trust-remote-code
--dtype float32
--max-num-batched-tokens 32768
# dtype is required for aarch64 (https://github.com/vllm-project/vllm/issues/11327) and improves speed on amd64.
# max-num-batched-tokens is required for aarch64 because chunked prefill isn't supported by V1 vllm backend
# for aarch64 yet. On aarch64 max-num-batched-tokens must be equal max-model-len for now (without chunked prefill).
# For details see https://github.com/vllm-project/vllm/issues/21179

profiles:
- 'cpu'

mock-llm:
build:
context: .github/workflows/related_docs_test/mock-llm
dockerfile: Dockerfile.mock-llm
ports:
- 8001:8080
profiles:
- 'cpu'

30 changes: 30 additions & 0 deletions .github/workflows/related_docs_test/index_child_file.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env bash
set -euo pipefail

ENDPOINT_URL=$1
PARTITION_NAME=$2
FILE_NAME=$3
PARENT_FILE_NAME=$4
CONTENT=$(cat)

metadata=$(jq -nc \
--arg parent_file_name "${PARENT_FILE_NAME}" \
'{
mimetype: "text/plain",
rels: [
{
target: $parent_file_name,
type: "parent"
}
]
}')

echo ${metadata}

curl -X 'POST' \
${ENDPOINT_URL}/indexer/partition/${PARTITION_NAME}/file/${FILE_NAME} \
-H 'accept: application/json' \
-H 'Content-Type: multipart/form-data' \
-F "file=@-;filename=${FILE_NAME};type=text/plain" \
-F "metadata=$metadata" <<< "$CONTENT"

Loading