linagora · dodekapod · Nov 12, 2025 · Nov 26, 2025 · Nov 26, 2025 · Nov 26, 2025
diff --git a/.github/workflows/related_docs_test.yaml b/.github/workflows/related_docs_test.yaml
@@ -0,0 +1,117 @@
+name: Related docs test
+
+on:
+  push:
+  workflow_dispatch:
+
+jobs:
+  related-docs-test:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Update apt and install packages
+        run: |
+          df -h
+          echo ${PWD}
+          sudo apt update
+          sudo apt install -y curl git jq
+          sudo apt clean
+          sudo apt autoremove --purge -y
+
+      -
+        name: Set up Docker Compose
+        uses: docker/setup-compose-action@v1
+        with:
+          version: v2.34.0
+
+      -
+        name: Free up space and move Docker storage to /mnt
+        run: |
+          echo "Stopping Docker..."
+          sudo systemctl stop docker
+
+          echo "Creating new Docker root at /mnt/docker..."
+          sudo mkdir -p /mnt/docker
+          sudo rsync -aqxP /var/lib/docker/ /mnt/docker
+
+          echo "Updating Docker daemon config..."
+          echo '{"data-root": "/mnt/docker"}' | sudo tee /etc/docker/daemon.json
+
+          cat /etc/docker/daemon.json
+
+          echo "Restarting Docker..."
+          sudo systemctl start docker
+
+          echo "Verifying Docker root directory:"
+          docker info | grep "Docker Root Dir"
+
+      -
+        name: Checkout current branch
+        uses: actions/checkout@v5
+        with:
+          submodules: true
+
+      -
+        name: Set up env
+        run: |
+          cp .github/workflows/related_docs_test/.env ./
+          cp .github/workflows/related_docs_test/*.yaml ./
+
+      -
+        name: Patch vllm to v0.9.2
+        run: sed -Ei 's/checkout v[0-9\.]+/checkout v0.9.2/' extern/vllm/Dockerfile.cpu
+
+      -
+        name: Build
+        run: docker compose --profile cpu build
+
+      -
+        name: Run
+        run: |
+          docker compose --profile cpu up -d || docker logs openrag-vllm-cpu-1
+          .github/workflows/smoke_test/wait_for_healthy.sh openrag-vllm-cpu-1
+
+      -
+        name: Cleanup
+        run: |
+          docker container prune -f
+          docker image prune -f
+          docker builder prune -f
+          df -h
+
+      -
+        name: List containers
+        run: docker container ls
+
+      -
+        name: Install Python venv
+        run: |
+          python3 -m venv venv
+          source venv/bin/activate
+          pip3 install -r utility/requirements.txt
+
+      -
+        name: Wait for OpenRag to start
+        run: |
+          .github/workflows/related_docs_test/wait_for_services.sh
+
+      -
+        name: Index small documents (relations test)
+        run: |
+          echo "Sun is shining"                | .github/workflows/related_docs_test/index_file.sh       http://localhost:8080 rel_test root.txt
+          sleep 10s
+          echo "Cats meow"                     | .github/workflows/related_docs_test/index_child_file.sh http://localhost:8080 rel_test child_a.txt root.txt
+          sleep 10s
+          echo "There are letters in the text" | .github/workflows/related_docs_test/index_child_file.sh http://localhost:8080 rel_test leaf_a.txt  child_a.txt
+          sleep 30s
+
+      -
+        name: Query small documents (relations test)
+        run: |
+          echo "Sun Is Shining" | .github/workflows/related_docs_test/chat_completion.sh http://localhost:8080 rel_test | grep -v '{\\"' | grep file_id | grep "root.txt"
+          echo "Sun Is Shining" | .github/workflows/related_docs_test/chat_completion.sh http://localhost:8080 rel_test | grep -v '{\\"' | grep file_id | grep "child_a.txt"
+          echo "Cats meow" | .github/workflows/related_docs_test/chat_completion.sh http://localhost:8080 rel_test | grep -v '{\\"' | grep file_id | grep "root.txt"
+          echo "Cats meow" | .github/workflows/related_docs_test/chat_completion.sh http://localhost:8080 rel_test | grep -v '{\\"' | grep file_id | grep "child_a.txt"
+          echo "Cats meow" | .github/workflows/related_docs_test/chat_completion.sh http://localhost:8080 rel_test | grep -v '{\\"' | grep file_id | grep "leaf_a.txt"
+          echo "There are letters in the text" | .github/workflows/related_docs_test/chat_completion.sh http://localhost:8080 rel_test | grep -v '{\\"' | grep file_id | grep "child_a.txt"
+          echo "There are letters in the text" | .github/workflows/related_docs_test/chat_completion.sh http://localhost:8080 rel_test | grep -v '{\\"' | grep file_id | grep "leaf_a.txt"
+
diff --git a/.github/workflows/related_docs_test/.env b/.github/workflows/related_docs_test/.env
@@ -0,0 +1,67 @@
+# LLM
+BASE_URL=http://mock-llm:8080/v1/
+API_KEY=sk-
+MODEL=mock-model
+
+# VLM (Visual Language Model) you can set it to the same as LLM if your LLM supports images
+VLM_BASE_URL=http://mock-llm:8080/v1/
+VLM_API_KEY=sk-
+VLM_MODEL=mock-model
+
+RAGMODE=SimpleRag
+## FastAPI App (no need to change it)
+# APP_PORT=8080 # this is the forwarded port
+# API_NUM_WORKERS=1 # Number of uvicorn workers for the FastAPI app
+
+## To enable API HTTP authentication via HTTPBearer
+# AUTH_TOKEN=sk-openrag-1234
+
+# SAVE_UPLOADED_FILES=true # usefull for chainlit source viewing
+
+# Set to true, it will mount chainlit chat ui to the fastapi app (Default: true)
+## WITH_CHAINLIT_UI=true
+
+# RETRIEVER
+CONTEXTUAL_RETRIEVAL=false
+
+# EMBEDDER
+EMBEDDER_MODEL_NAME=Qwen/Qwen3-Embedding-0.6B
+EMBEDDER_BASE_URL=http://vllm:8000/v1
+# EMBEDDER_API_KEY=EMPTY
+
+# RERANKER
+RERANKER_ENABLED=false
+RERANKER_MODEL=Alibaba-NLP/gte-multilingual-reranker-base # or jinaai/jina-reranker-v2-base-multilingual
+
+# Prompts
+PROMPTS_DIR=../prompts/example1
+
+# Loaders
+PDFLoader=MarkerLoader
+XDG_CACHE_HOME=/app/model_weights
+# If using MarkerLoader
+MARKER_MAX_TASKS_PER_CHILD=1
+MARKER_MAX_PROCESSES=1
+MARKER_MIN_PROCESSES=1
+MARKER_POOL_SIZE=1 # Value au increment if you have a cluster of machines
+MARKER_NUM_GPUS=0.01
+
+# Ray
+RAY_POOL_SIZE=1 # Number of serializer actor instances
+RAY_MAX_TASKS_PER_WORKER=2 # Number of tasks per serializer
+RAY_DEDUP_LOGS=0 # turns off ray log deduplication that appear across multiple processes
+RAY_ENABLE_RECORD_ACTOR_TASK_LOGGING=1 # # to enable logs at task level in ray dashboard
+RAY_task_retry_delay_ms=3000
+RAY_ENABLE_UV_RUN_RUNTIME_ENV=0 # critical with the newest version of UV
+RAY_memory_monitor_refresh_ms=0
+
+# Indexer UI 
+## 1. replace X.X.X.X with localhost if launching local or with your server IP
+## 2. APP_PORT with your FastAPI port (8080 by default)
+## 3. Base URL of the Indexer UI (required to prevent CORS issues). Replace INDEXERUI_PORT with its value
+## 4. Base URL of your FastAPI backend. Used by the frondend. Replace APP_PORT with the actual port number of your FastAPI backend
+
+VITE_INCLUDE_CREDENTIALS=false # set true if fastapi authentification is enabled
+INDEXERUI_PORT=8060 # Port to expose the Indexer UI (default is 3042)
+INDEXERUI_URL='http://X.X.X.X:INDEXERUI_PORT'                 
+VITE_API_BASE_URL='http://X.X.X.X:APP_PORT'
diff --git a/.github/workflows/related_docs_test/chat_completion.sh b/.github/workflows/related_docs_test/chat_completion.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ENDPOINT_URL=$1
+PARTITION_NAME=$2
+QUERY=$(cat)
+
+payload=$(jq -nc \
+  --arg model "openrag-${PARTITION_NAME}" \
+  --arg query "${QUERY}" \
+  '{
+    model: $model,
+    messages: [{role: "user", content: $query}],
+    temperature: 0.3,
+    top_p: 1,
+    stream: false,
+    max_tokens: 1024,
+    logprobs: 0,
+    metadata: {use_map_reduce: false}
+  }')
+
+#echo ${payload}
+
+response=`curl --connect-timeout 600 -X POST "${ENDPOINT_URL}/v1/chat/completions" \
+  -H "accept: application/json" \
+  -H "Content-Type: application/json" \
+  -d "$payload"`
+
+#echo $response | jq .
+
+extra=`echo $response | jq '.extra | fromjson'`
+
+echo $extra | jq .
+
diff --git a/.github/workflows/related_docs_test/docker-compose.yaml b/.github/workflows/related_docs_test/docker-compose.yaml
@@ -0,0 +1,141 @@
+include:
+  - vdb/milvus.yaml
+#  - extern/infinity.yaml
+
+x-openrag: &openrag_template
+  #image: ghcr.io/linagora/openrag:dev-latest
+  build:
+    context: .
+    dockerfile: Dockerfile
+  volumes:
+    - ${CONFIG_VOLUME:-./.hydra_config}:/app/.hydra_config
+    - ${DATA_VOLUME:-./data}:/app/data
+    - ${MODEL_WEIGHTS_VOLUME:-~/.cache/huggingface}:/app/model_weights # Model weights for RAG
+    - ./openrag:/app/openrag # For dev mode
+    - /$SHARED_ENV:/ray_mount/.env # Shared environment variables
+    - ./ray_mount/logs:/app/logs
+  ports:
+    - ${APP_PORT:-8080}:${APP_iPORT:-8080}
+    - ${RAY_DASHBOARD_PORT:-8265}:8265 # Disable when in cluster mode
+  networks:
+    default:
+      aliases:
+        - openrag
+  env_file:
+    - ${SHARED_ENV:-.env}
+  shm_size: 10.24gb
+
+x-vllm: &vllm_template
+  networks:
+    default:
+      aliases:
+        - vllm
+  restart: none  # Better to fail in the CI context
+  environment:
+    - HUGGING_FACE_HUB_TOKEN
+  ipc: "host"
+  volumes:
+    - ${VLLM_CACHE:-/root/.cache/huggingface}:/root/.cache/huggingface # put ./vllm_cache if you want to have the weights on the vllm_cache folder in your project
+  command: >
+    --model ${EMBEDDER_MODEL_NAME:-jinaai/jina-embeddings-v3}
+    --trust-remote-code
+    --task embed
+    --gpu_memory_utilization 0.3
+  # --max-num-seqs 1
+  # --max-model-len ${MOX_MODEL_LEN:-2048}
+  # gpu_memory_utilization, max-num-seqs et max-model-len can be tuned depending on your GPU memory
+
+  healthcheck:
+    test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
+    interval: 30s
+    timeout: 10s
+    retries: 3
+    start_period: 360s
+  # ports:
+  #   - ${VLLM_PORT:-8000}:8000
+services:
+  # GPU - default 
+  openrag:
+    <<: *openrag_template
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [ gpu ]
+    profiles:
+      - ''
+    depends_on:
+      milvus:
+        condition: service_healthy
+      vllm-gpu:
+        condition: service_healthy
+
+  # No GPU
+  openrag-cpu:
+    <<: *openrag_template
+    deploy: {}
+    profiles:
+      - 'cpu'
+    depends_on:
+      milvus:
+        condition: service_healthy
+      vllm-cpu:
+        condition: service_healthy
+
+  rdb:
+    image: postgres:15
+    environment:
+      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-root_password}
+      - POSTGRES_USER=${POSTGRES_USER:-root}
+    volumes:
+      - ${DB_VOLUME:-./db}:/var/lib/postgresql/data
+
+  vllm-gpu:
+    <<: *vllm_template
+    image: vllm/vllm-openai:v0.9.2
+    runtime: nvidia
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all 
+              capabilities: [gpu]
+    profiles:
+      - '' # Empty string gives default behavior (but does not run when cpu requested)
+
+  vllm-cpu:
+    <<: *vllm_template
+    build:
+      context: extern/vllm
+      dockerfile: Dockerfile.cpu
+      target: vllm-openai
+    image: openrag-vllm-openai-cpu
+    deploy: {}
+    environment:
+      # - VLLM_CPU_KVCACHE_SPACE=8 # Default value isn't sufficient for full context length
+      - VLLM_USE_V1=0 # for ibm-granite/granite-embedding-small-english-r2
+    command: >
+      --model ${EMBEDDER_MODEL_NAME:-jinaai/jina-embeddings-v3}
+      --trust-remote-code
+      --dtype float32
+      --max-num-batched-tokens 32768
+    # dtype is required for aarch64 (https://github.com/vllm-project/vllm/issues/11327) and improves speed on amd64.
+    # max-num-batched-tokens is required for aarch64 because chunked prefill isn't supported by V1 vllm backend
+    # for aarch64 yet. On aarch64 max-num-batched-tokens must be equal max-model-len for now (without chunked prefill).
+    # For details see https://github.com/vllm-project/vllm/issues/21179
+
+    profiles:
+      - 'cpu'
+
+  mock-llm:
+    build:
+      context: .github/workflows/related_docs_test/mock-llm
+      dockerfile: Dockerfile.mock-llm
+    ports:
+      - 8001:8080
+    profiles:
+      - 'cpu'
+
diff --git a/.github/workflows/related_docs_test/index_child_file.sh b/.github/workflows/related_docs_test/index_child_file.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ENDPOINT_URL=$1
+PARTITION_NAME=$2
+FILE_NAME=$3
+PARENT_FILE_NAME=$4
+CONTENT=$(cat)
+
+metadata=$(jq -nc \
+  --arg parent_file_name "${PARENT_FILE_NAME}" \
+  '{
+    mimetype: "text/plain",
+    rels: [
+      {
+        target: $parent_file_name,
+        type: "parent"
+      }
+    ]
+  }')
+
+echo ${metadata}
+
+curl -X 'POST' \
+  ${ENDPOINT_URL}/indexer/partition/${PARTITION_NAME}/file/${FILE_NAME} \
+  -H 'accept: application/json' \
+  -H 'Content-Type: multipart/form-data' \
+  -F "file=@-;filename=${FILE_NAME};type=text/plain" \
+  -F "metadata=$metadata" <<< "$CONTENT"
+