Merge pull request #270 from bernardladenthin/claude/laughing-albatta… #454
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com> | |
| # SPDX-FileCopyrightText: 2023-2025 Konstantin Herud | |
| # | |
| # SPDX-License-Identifier: MIT | |
| name: Publish | |
| on: | |
| push: | |
| branches: [ main ] | |
| tags: ['v*'] | |
| pull_request: | |
| workflow_dispatch: | |
| inputs: | |
| publish_to_central: | |
| description: "Deploy to Maven Central (snapshot if -SNAPSHOT, release if a vX.Y.Z tag)" | |
| type: boolean | |
| default: false | |
| use_cache: | |
| description: "Use the shared sccache/Depot compiler cache (faster incremental builds)" | |
| type: boolean | |
| default: true | |
| env: | |
| JAVA_VERSION: '21' | |
| MODEL_URL: "https://huggingface.co/TheBloke/CodeLlama-7B-GGUF/resolve/main/codellama-7b.Q2_K.gguf" | |
| MODEL_NAME: "codellama-7b.Q2_K.gguf" | |
| RERANKING_MODEL_URL: "https://huggingface.co/gpustack/jina-reranker-v1-tiny-en-GGUF/resolve/main/jina-reranker-v1-tiny-en-Q4_0.gguf" | |
| RERANKING_MODEL_NAME: "jina-reranker-v1-tiny-en-Q4_0.gguf" | |
| DRAFT_MODEL_URL: "https://huggingface.co/QuantFactory/AMD-Llama-135m-code-GGUF/resolve/main/AMD-Llama-135m-code.Q2_K.gguf" | |
| DRAFT_MODEL_NAME: "AMD-Llama-135m-code.Q2_K.gguf" | |
| REASONING_MODEL_URL: "https://huggingface.co/unsloth/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B-Q4_K_M.gguf" | |
| REASONING_MODEL_NAME: "Qwen3-0.6B-Q4_K_M.gguf" | |
| TOOL_MODEL_URL: "https://huggingface.co/bartowski/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf" | |
| TOOL_MODEL_NAME: "Qwen2.5-1.5B-Instruct-Q4_K_M.gguf" | |
| NOMIC_EMBED_MODEL_URL: "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF/resolve/main/nomic-embed-text-v1.5.f16.gguf" | |
| NOMIC_EMBED_MODEL_NAME: "nomic-embed-text-v1.5.f16.gguf" | |
| # Vision model + mmproj for MultimodalIntegrationTest (upstream kherud/java-llama.cpp#103 / #34). | |
| # SmolVLM-500M is the smallest community vision GGUF that loads reliably | |
| # under the upstream mtmd pipeline. Total download ~600 MB across model | |
| # plus mmproj; matches the existing per-test-job download budget. | |
| VISION_MODEL_URL: "https://huggingface.co/ggml-org/SmolVLM-500M-Instruct-GGUF/resolve/main/SmolVLM-500M-Instruct-Q8_0.gguf" | |
| VISION_MODEL_NAME: "SmolVLM-500M-Instruct-Q8_0.gguf" | |
| VISION_MMPROJ_URL: "https://huggingface.co/ggml-org/SmolVLM-500M-Instruct-GGUF/resolve/main/mmproj-SmolVLM-500M-Instruct-Q8_0.gguf" | |
| VISION_MMPROJ_NAME: "mmproj-SmolVLM-500M-Instruct-Q8_0.gguf" | |
| # Text-to-speech models for AudioInputIntegrationTest's sibling TtsIntegrationTest (OuteTTS pipeline). | |
| TTS_MODEL_URL: "https://huggingface.co/second-state/OuteTTS-0.2-500M-GGUF/resolve/main/OuteTTS-0.2-500M-Q4_K_M.gguf" | |
| TTS_MODEL_NAME: "OuteTTS-0.2-500M-Q4_K_M.gguf" | |
| TTS_VOCODER_URL: "https://huggingface.co/ggml-org/WavTokenizer/resolve/main/WavTokenizer-Large-75-F16.gguf" | |
| TTS_VOCODER_NAME: "WavTokenizer-Large-75-F16.gguf" | |
| # Test image used by MultimodalIntegrationTest is committed to the repo | |
| # at src/test/resources/images/test-image.jpg (see the README in that | |
| # directory for licensing). No download step is needed; CI just points | |
| # mvn test at the committed path. | |
| VISION_IMAGE_PATH: "src/test/resources/images/test-image.jpg" | |
| permissions: | |
| contents: read | |
| jobs: | |
| # --------------------------------------------------------------------------- | |
| # Start gate — single cancellable abort window before the pipeline starts. | |
| # The wait duration lives in the `startgate` GitHub Environment (Settings → | |
| # Environments → startgate → Wait timer). | |
| # --------------------------------------------------------------------------- | |
| startgate: | |
| name: Start gate (abort window) | |
| runs-on: ubuntu-latest | |
| environment: startgate | |
| steps: | |
| - run: echo "Start gate elapsed — proceeding with pipeline." | |
| # --------------------------------------------------------------------------- | |
| # Cross-compile jobs (Docker / dockcross) — produce release artifacts, no testing | |
| # --------------------------------------------------------------------------- | |
| code-style: | |
| name: Code style (spotless) + package graph | |
| needs: startgate | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - uses: actions/setup-java@v5 | |
| with: | |
| java-version: '21' | |
| distribution: temurin | |
| - name: Spotless check (fail fast on format violations) | |
| run: mvn -B --no-transfer-progress spotless:check | |
| - name: Print internal package dependency graph (jdeps, informational) | |
| continue-on-error: true | |
| run: | | |
| mvn -B --no-transfer-progress -DskipTests -Denforcer.skip=true compile | |
| echo "=== internal package dependency graph (jdeps, bytecode) ===" | |
| jdeps -verbose:package target/classes | grep 'net.ladenthin.llama' || true | |
| # --------------------------------------------------------------------------- | |
| # Build the llama.cpp WebUI ONCE, from the same pinned tag CMakeLists.txt fetches, | |
| # and share it to every native build as the generated, platform-independent | |
| # ui.cpp/ui.h ("webui-generated" artifact). The native builds embed it into | |
| # libjllama (CMake's "WebUI assets" block); when this job's artifact is absent the | |
| # build falls back to the empty-asset stub. npm runs only here, in one controlled | |
| # job — never in the dockcross cross-compilers (which have no node) or per-platform. | |
| # --------------------------------------------------------------------------- | |
| build-webui: | |
| name: Build WebUI assets (shared) | |
| needs: startgate | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Resolve pinned llama.cpp tag from CMakeLists.txt | |
| id: tag | |
| shell: bash | |
| run: | | |
| TAG=$(grep -oE 'GIT_TAG[[:space:]]+b[0-9]+' CMakeLists.txt | grep -oE 'b[0-9]+' | head -1) | |
| if [ -z "$TAG" ]; then | |
| echo "could not resolve llama.cpp GIT_TAG (b<nnnn>) from CMakeLists.txt" >&2 | |
| exit 1 | |
| fi | |
| echo "tag=$TAG" >> "$GITHUB_OUTPUT" | |
| echo "Pinned llama.cpp WebUI tag: $TAG" | |
| - name: Checkout llama.cpp tools/ui at the pinned tag | |
| uses: actions/checkout@v7 | |
| with: | |
| repository: ggml-org/llama.cpp | |
| ref: ${{ steps.tag.outputs.tag }} | |
| path: llamacpp-ui | |
| sparse-checkout: tools/ui | |
| sparse-checkout-cone-mode: true | |
| - uses: actions/setup-node@v6 | |
| with: | |
| node-version: '24' | |
| cache: npm | |
| cache-dependency-path: llamacpp-ui/tools/ui/package-lock.json | |
| - name: Build WebUI (Svelte/Vite) | |
| working-directory: llamacpp-ui/tools/ui | |
| env: | |
| HF_UI_VERSION: ${{ steps.tag.outputs.tag }} | |
| LLAMA_BUILD_NUMBER: ${{ steps.tag.outputs.tag }} | |
| run: | | |
| npm ci --ignore-scripts | |
| npm run build | |
| test -f dist/index.html | |
| - name: Embed assets into ui.cpp / ui.h (gzip parity with upstream) | |
| working-directory: llamacpp-ui/tools/ui | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| # gzip every asset into dist/_gzip/<path> so llama-ui-embed embeds the | |
| # compressed bytes (LLAMA_UI_GZIP parity); embed auto-detects _gzip. | |
| ( cd dist && find . -type f -not -path './_gzip/*' | while read -r f; do | |
| mkdir -p "_gzip/$(dirname "$f")" | |
| gzip -9 -c "$f" > "_gzip/$f" | |
| done ) | |
| # llama-ui-embed is a self-contained C++17 host tool (no npm) — build + run it. | |
| g++ -O2 -std=c++17 -o llama-ui-embed embed.cpp | |
| mkdir -p "$GITHUB_WORKSPACE/webui-generated" | |
| ./llama-ui-embed \ | |
| "$GITHUB_WORKSPACE/webui-generated/ui.cpp" \ | |
| "$GITHUB_WORKSPACE/webui-generated/ui.h" \ | |
| dist | |
| echo "=== generated WebUI assets ===" | |
| ls -la "$GITHUB_WORKSPACE/webui-generated" | |
| if grep -q LLAMA_UI_HAS_ASSETS "$GITHUB_WORKSPACE/webui-generated/ui.h"; then | |
| echo "LLAMA_UI_HAS_ASSETS: present (real WebUI embedded)" | |
| else | |
| echo "ERROR: embed produced an empty asset table" >&2 | |
| exit 1 | |
| fi | |
| - name: Upload WebUI artifact | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| retention-days: 1 | |
| if-no-files-found: error | |
| crosscompile-linux-x86_64-cuda: | |
| name: Cross-Compile manylinux_2_28 x86_64 (CUDA) | |
| needs: [startgate, build-webui] | |
| runs-on: ubuntu-latest | |
| # CUDA cache. build_cuda_linux.sh execs build.sh, so the same sccache probe guards this job. | |
| # build.sh also wraps nvcc (CMAKE_CUDA_COMPILER_LAUNCHER=sccache) for CUDA builds, so the | |
| # per-arch .cu device passes — the dominant cost of this job — cache over Depot alongside the | |
| # gcc host TUs. Verified on a warm run: 100% hit on CUDA / CUBIN / device-code (139 CUDA hits, | |
| # 99.86% overall), cutting the job from ~51 min cold to ~15 min warm. The job therefore always | |
| # builds the FULL CMAKE_CUDA_ARCHITECTURES set (no single-arch shortcut) and leans on the warm | |
| # cache for speed, so every artifact stays release-safe (runs on every GPU generation) on PR / | |
| # push as well as publish. CUDA_FAST_BUILD still exists in build_cuda_linux.sh as a LOCAL-dev | |
| # knob, but CI no longer sets it. The first-run sccache debug diagnostics (SCCACHE_LOG / | |
| # SCCACHE_ERROR_LOG / RUST_BACKTRACE) were dropped now that caching is confirmed; build.sh still | |
| # prints the `sccache --show-stats` hit table at the end of every run. Inert without DEPOT_TOKEN | |
| # (fork PRs) or use_cache=false. | |
| env: | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| DOCKCROSS_ARGS: "-e SCCACHE_WEBDAV_ENDPOINT -e SCCACHE_WEBDAV_TOKEN -e USE_CACHE" | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== Host CPU Information ===" | |
| lscpu | |
| echo "" | |
| echo "=== CPU Details from /proc/cpuinfo ===" | |
| cat /proc/cpuinfo | |
| - name: Build libraries | |
| shell: bash | |
| run: | | |
| .github/dockcross/dockcross-manylinux_2_28-x64 .github/build_cuda_linux.sh "-DOS_NAME=Linux -DOS_ARCH=x86_64" | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: linux-libraries-cuda | |
| path: ${{ github.workspace }}/src/main/resources_linux_cuda/net/ladenthin/llama/ | |
| crosscompile-linux-x86_64: | |
| name: Cross-Compile manylinux2014 x86_64 | |
| needs: [startgate, build-webui] | |
| runs-on: ubuntu-latest | |
| # Phase 2 dockcross cache rollout — job 1, VERIFIED green in CI (PR #245): sccache v0.16.0 | |
| # probe passed in-container (devtoolset-10 gcc), cache ON over Depot WebDAV (cold run: 275 | |
| # objects stored). Steady-state env below — the first-run diagnostics (SCCACHE_LOG / | |
| # SCCACHE_ERROR_LOG / RUST_BACKTRACE) were dropped now that it is proven. Inert without | |
| # DEPOT_TOKEN (fork PRs) or with use_cache=false; a crashing sccache still falls back to a | |
| # green uncached build via the build.sh probe. | |
| env: | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| DOCKCROSS_ARGS: "-e SCCACHE_WEBDAV_ENDPOINT -e SCCACHE_WEBDAV_TOKEN -e USE_CACHE" | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== Host CPU Information ===" | |
| lscpu | |
| echo "" | |
| echo "=== CPU Details from /proc/cpuinfo ===" | |
| cat /proc/cpuinfo | |
| - name: Build libraries | |
| shell: bash | |
| run: | | |
| .github/dockcross/dockcross-manylinux2014-x64 .github/build.sh "-DOS_NAME=Linux -DOS_ARCH=x86_64" | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: Linux-x86_64-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| crosscompile-linux-aarch64: | |
| name: Build and Test Linux aarch64 | |
| needs: [startgate, build-webui] | |
| # Native ARM64 build on GitHub's free arm64 runner, mirroring upstream llama.cpp's | |
| # `ubuntu-cpu` aarch64 release job (ubuntu-24.04-arm + GCC 14). Replaces the former dockcross | |
| # `linux-arm64-lts` cross-compile (GCC 8.5, glibc 2.17), which can no longer compile llama.cpp | |
| # b9739 — its C++17 CTAD-in-`new` needs GCC >= 12. Building natively also lets us run the C++ | |
| # unit suite (ctest) on real ARM hardware for the first time (the cross build ran no tests). | |
| # Trade-off: the glibc floor rises 2.17 -> ~2.39, the same envelope upstream's own ARM binaries | |
| # require. GGML_NATIVE=OFF keeps the artifact portable across ARMv8 CPU generations (no | |
| # build-host -march baked in). The job id is kept (a `needs:` target downstream); only the | |
| # display name changed, so update any branch-protection required-check that pinned the old name. | |
| runs-on: ubuntu-24.04-arm | |
| env: | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Install toolchain (GCC 14, mirrors upstream llama.cpp ARM release) | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y gcc-14 g++-14 | |
| echo "CC=gcc-14" >> "$GITHUB_ENV" | |
| echo "CXX=g++-14" >> "$GITHUB_ENV" | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== Host CPU Information ===" | |
| lscpu | |
| echo "" | |
| echo "=== CPU Details from /proc/cpuinfo ===" | |
| cat /proc/cpuinfo | |
| - name: Build libraries | |
| shell: bash | |
| run: | | |
| mvn --no-transfer-progress compile | |
| .github/build.sh "-DOS_NAME=Linux -DOS_ARCH=aarch64 -DGGML_NATIVE=OFF -DBUILD_TESTING=ON" | |
| - name: Run C++ unit tests | |
| run: ctest --test-dir build --output-on-failure | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: Linux-aarch64-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| crosscompile-android-aarch64: | |
| name: Cross-Compile Android aarch64 | |
| needs: [startgate, build-webui] | |
| runs-on: ubuntu-latest | |
| # Phase 2 dockcross cache rollout — job 4. Same steady-state env as manylinux2014 (job 1); | |
| # the build.sh probe makes it safe to enable without a separate verification run. Inert | |
| # without DEPOT_TOKEN (fork PRs) or use_cache=false. | |
| env: | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| DOCKCROSS_ARGS: "-e SCCACHE_WEBDAV_ENDPOINT -e SCCACHE_WEBDAV_TOKEN -e USE_CACHE" | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== Host CPU Information ===" | |
| lscpu | |
| echo "" | |
| echo "=== CPU Details from /proc/cpuinfo ===" | |
| cat /proc/cpuinfo | |
| - name: Build libraries | |
| shell: bash | |
| run: | | |
| .github/dockcross/dockcross-android-arm64 .github/build.sh "-DOS_NAME=Linux-Android -DOS_ARCH=aarch64" | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: Linux-Android-aarch64-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| crosscompile-android-aarch64-opencl: | |
| name: Cross-Compile Android aarch64 (OpenCL/Adreno) | |
| needs: [startgate, build-webui] | |
| runs-on: ubuntu-latest | |
| # Phase 2 dockcross cache rollout — job 5. build_opencl_android.sh stages the OpenCL | |
| # headers/loader, then delegates the jllama cmake build to build.sh (which owns the | |
| # sccache probe + launcher). Same steady-state env as the other dockcross jobs. Inert | |
| # without DEPOT_TOKEN (fork PRs) or use_cache=false. | |
| env: | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| DOCKCROSS_ARGS: "-e SCCACHE_WEBDAV_ENDPOINT -e SCCACHE_WEBDAV_TOKEN -e USE_CACHE" | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - name: Build libraries | |
| shell: bash | |
| run: | | |
| .github/dockcross/dockcross-android-arm64 .github/build_opencl_android.sh "-DOS_NAME=Linux-Android -DOS_ARCH=aarch64 -DGGML_OPENCL=ON -DGGML_OPENCL_EMBED_KERNELS=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON" | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: android-libraries-opencl | |
| path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/ | |
| # --------------------------------------------------------------------------- | |
| # Native build jobs — produce release artifacts + run C++ unit tests | |
| # --------------------------------------------------------------------------- | |
| build-macos-arm64-no-metal: | |
| name: Build and Test macOS 15 arm64 (no Metal) | |
| needs: [startgate, build-webui] | |
| runs-on: macos-15 | |
| env: | |
| BUILD_JOBS: 2 | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== CPU Information ===" | |
| sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null || true | |
| echo "" | |
| echo "=== Processor Details ===" | |
| system_profiler SPHardwareDataType | |
| - name: Install sccache (shared compiler cache) | |
| if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != '' | |
| continue-on-error: true | |
| run: brew install sccache | |
| - name: Build libraries | |
| shell: bash | |
| run: | | |
| mvn --no-transfer-progress compile | |
| .github/build.sh -DLLAMA_METAL=OFF -DGGML_NATIVE=OFF -DBUILD_TESTING=ON | |
| - name: Run C++ unit tests | |
| run: ctest --test-dir build --output-on-failure | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: macos-15-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| build-macos-arm64-metal: | |
| name: Build and Test macOS 14 arm64 (Metal) | |
| needs: [startgate, build-webui] | |
| runs-on: macos-14 | |
| env: | |
| BUILD_JOBS: 2 | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== CPU Information ===" | |
| sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null || true | |
| echo "" | |
| echo "=== Processor Details ===" | |
| system_profiler SPHardwareDataType | |
| - name: Install sccache (shared compiler cache) | |
| if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != '' | |
| continue-on-error: true | |
| run: brew install sccache | |
| - name: Build libraries | |
| shell: bash | |
| run: | | |
| mvn --no-transfer-progress compile | |
| .github/build.sh -DLLAMA_METAL_EMBED_LIBRARY=ON -DBUILD_TESTING=ON | |
| - name: Run C++ unit tests | |
| run: ctest --test-dir build --output-on-failure | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: macos-14-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| build-windows-x86_64: | |
| name: Build and Test Windows 2025 x86_64 (VS 2026) | |
| needs: [startgate, build-webui] | |
| runs-on: windows-2025-vs2026 | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - name: Display CPU Info | |
| shell: pwsh | |
| run: | | |
| Write-Host "=== CPU Information (Get-CimInstance - All Properties) ===" | |
| Get-CimInstance Win32_Processor | Select-Object * | Format-List | |
| Write-Host "" | |
| Write-Host "=== CPU Information (systeminfo) ===" | |
| systeminfo | Select-String "Processor" | |
| Write-Host "" | |
| Write-Host "=== CPU Information (Get-ComputerInfo) ===" | |
| Get-ComputerInfo -Property "CsProcessors*" 2>$null || Write-Host "Get-ComputerInfo not available" | |
| - name: Build libraries | |
| shell: cmd | |
| run: | | |
| .github\build.bat -G "Visual Studio 18 2026" -A "x64" -DOS_NAME=Windows -DOS_ARCH=x86_64 -DBUILD_TESTING=ON | |
| - name: Run C++ unit tests | |
| run: ctest --test-dir build --output-on-failure | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: Windows-x86_64-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| build-windows-x86: | |
| name: Build and Test Windows 2025 x86 (VS 2026) | |
| needs: [startgate, build-webui] | |
| runs-on: windows-2025-vs2026 | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - name: Display CPU Info | |
| shell: pwsh | |
| run: | | |
| Write-Host "=== CPU Information (Get-CimInstance - All Properties) ===" | |
| Get-CimInstance Win32_Processor | Select-Object * | Format-List | |
| Write-Host "" | |
| Write-Host "=== CPU Information (systeminfo) ===" | |
| systeminfo | Select-String "Processor" | |
| Write-Host "" | |
| Write-Host "=== CPU Information (Get-ComputerInfo) ===" | |
| Get-ComputerInfo -Property "CsProcessors*" 2>$null || Write-Host "Get-ComputerInfo not available" | |
| - name: Build libraries | |
| shell: cmd | |
| run: | | |
| .github\build.bat -G "Visual Studio 18 2026" -A "Win32" -DOS_NAME=Windows -DOS_ARCH=x86 -DBUILD_TESTING=ON | |
| - name: Run C++ unit tests | |
| run: ctest --test-dir build --output-on-failure | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: Windows-x86-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| # --------------------------------------------------------------------------- | |
| # Windows Ninja Multi-Config + sccache — EVALUATION jobs (not yet released). | |
| # The Visual Studio generator ignores CMAKE_{C,CXX}_COMPILER_LAUNCHER, so the two | |
| # build-windows-* jobs above are the only uncached native builds. Upstream | |
| # llama.cpp ships its windows-cuda artifact with "Ninja Multi-Config" + MSVC, | |
| # which proves the combination works on the same tree. These two jobs run that | |
| # combination in parallel with the trusted VS jobs and front cl.exe with sccache | |
| # over Depot WebDAV (build.bat probe-guards it). Artifacts are named | |
| # `Windows-*-ninja` (NOT `*-libraries`) so the package job's `pattern: "*-libraries"` | |
| # does NOT pick them up — they are evaluation-only until cache hits are confirmed, | |
| # at which point the release path is switched over (see TODO.md). The package job's | |
| # `needs:` is intentionally left unchanged. | |
| # --------------------------------------------------------------------------- | |
| build-windows-x86_64-ninja: | |
| name: Build and Test Windows 2025 x86_64 (Ninja Multi-Config + sccache, eval) | |
| needs: [startgate, build-webui] | |
| runs-on: windows-2025-vs2026 | |
| env: | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - name: Set up MSVC developer environment (x64) | |
| uses: ilammy/msvc-dev-cmd@v1 | |
| with: | |
| arch: x64 | |
| - name: Install sccache (shared compiler cache) | |
| if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != '' | |
| continue-on-error: true | |
| shell: pwsh | |
| run: | | |
| $ver = "0.16.0" | |
| $rel = "sccache-v$ver-x86_64-pc-windows-msvc" | |
| $url = "https://github.com/mozilla/sccache/releases/download/v$ver/$rel.zip" | |
| Write-Host "Downloading $url" | |
| Invoke-WebRequest -Uri $url -OutFile "$env:RUNNER_TEMP\sccache.zip" | |
| Expand-Archive -Path "$env:RUNNER_TEMP\sccache.zip" -DestinationPath "$env:RUNNER_TEMP\sccache" -Force | |
| Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\sccache\$rel" | |
| - name: Display CPU Info | |
| shell: pwsh | |
| run: | | |
| Write-Host "=== CPU Information (Get-CimInstance - All Properties) ===" | |
| Get-CimInstance Win32_Processor | Select-Object * | Format-List | |
| Write-Host "" | |
| Write-Host "=== CPU Information (systeminfo) ===" | |
| systeminfo | Select-String "Processor" | |
| Write-Host "" | |
| Write-Host "=== CPU Information (Get-ComputerInfo) ===" | |
| Get-ComputerInfo -Property "CsProcessors*" 2>$null || Write-Host "Get-ComputerInfo not available" | |
| - name: Build libraries | |
| shell: cmd | |
| run: | | |
| .github\build.bat -G "Ninja Multi-Config" -DOS_NAME=Windows -DOS_ARCH=x86_64 -DBUILD_TESTING=ON | |
| - name: Run C++ unit tests | |
| run: ctest --test-dir build --output-on-failure | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: Windows-x86_64-ninja | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| build-windows-x86-ninja: | |
| name: Build and Test Windows 2025 x86 (Ninja Multi-Config + sccache, eval) | |
| needs: [startgate, build-webui] | |
| runs-on: windows-2025-vs2026 | |
| env: | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - name: Set up MSVC developer environment (x86) | |
| uses: ilammy/msvc-dev-cmd@v1 | |
| with: | |
| arch: x86 | |
| - name: Install sccache (shared compiler cache) | |
| if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != '' | |
| continue-on-error: true | |
| shell: pwsh | |
| run: | | |
| $ver = "0.16.0" | |
| $rel = "sccache-v$ver-x86_64-pc-windows-msvc" | |
| $url = "https://github.com/mozilla/sccache/releases/download/v$ver/$rel.zip" | |
| Write-Host "Downloading $url" | |
| Invoke-WebRequest -Uri $url -OutFile "$env:RUNNER_TEMP\sccache.zip" | |
| Expand-Archive -Path "$env:RUNNER_TEMP\sccache.zip" -DestinationPath "$env:RUNNER_TEMP\sccache" -Force | |
| Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\sccache\$rel" | |
| - name: Display CPU Info | |
| shell: pwsh | |
| run: | | |
| Write-Host "=== CPU Information (Get-CimInstance - All Properties) ===" | |
| Get-CimInstance Win32_Processor | Select-Object * | Format-List | |
| Write-Host "" | |
| Write-Host "=== CPU Information (systeminfo) ===" | |
| systeminfo | Select-String "Processor" | |
| Write-Host "" | |
| Write-Host "=== CPU Information (Get-ComputerInfo) ===" | |
| Get-ComputerInfo -Property "CsProcessors*" 2>$null || Write-Host "Get-ComputerInfo not available" | |
| - name: Build libraries | |
| shell: cmd | |
| run: | | |
| .github\build.bat -G "Ninja Multi-Config" -DOS_NAME=Windows -DOS_ARCH=x86 -DBUILD_TESTING=ON | |
| - name: Run C++ unit tests | |
| run: ctest --test-dir build --output-on-failure | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: Windows-x86-ninja | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| # --------------------------------------------------------------------------- | |
| # CI-only jobs — no release artifact, purely for test coverage | |
| # --------------------------------------------------------------------------- | |
| test-cpp-linux-x86_64: | |
| name: C++ Tests Ubuntu Latest x86_64 | |
| needs: startgate | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Display CPU Info | |
| run: | | |
| echo "=== CPU Information ===" | |
| lscpu | |
| echo "" | |
| echo "=== CPU Details from /proc/cpuinfo ===" | |
| cat /proc/cpuinfo | |
| - name: Build libraries | |
| run: | | |
| mvn -q --no-transfer-progress compile | |
| .github/build.sh -DBUILD_TESTING=ON | |
| - name: Run C++ unit tests | |
| run: ctest --test-dir build --output-on-failure | |
| build-macos-arm64-metal-15: | |
| name: Build and Test macOS 15 arm64 (Metal) | |
| needs: [startgate, build-webui] | |
| runs-on: macos-15 | |
| env: | |
| BUILD_JOBS: 2 | |
| USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }} | |
| SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev | |
| SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }} | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Download shared WebUI assets | |
| uses: actions/download-artifact@v8 | |
| with: | |
| name: webui-generated | |
| path: ${{ github.workspace }}/webui-generated/ | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== CPU Information ===" | |
| sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null || true | |
| echo "" | |
| echo "=== Processor Details ===" | |
| system_profiler SPHardwareDataType | |
| - name: Install sccache (shared compiler cache) | |
| if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != '' | |
| continue-on-error: true | |
| run: brew install sccache | |
| - name: Build libraries | |
| shell: bash | |
| run: | | |
| mvn --no-transfer-progress compile | |
| .github/build.sh -DLLAMA_METAL_EMBED_LIBRARY=ON -DGGML_NATIVE=OFF -DBUILD_TESTING=ON | |
| - name: Run C++ unit tests | |
| run: ctest --test-dir build --output-on-failure | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: macos-15-metal-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| # --------------------------------------------------------------------------- | |
| # Java test jobs — download release artifact, run mvn test | |
| # --------------------------------------------------------------------------- | |
| test-java-linux-x86_64: | |
| name: Java Tests Ubuntu Latest x86_64 | |
| needs: crosscompile-linux-x86_64 | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== CPU Information ===" | |
| lscpu | |
| echo "" | |
| echo "=== CPU Details from /proc/cpuinfo ===" | |
| cat /proc/cpuinfo | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Linux-x86_64-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| # GGUF model cache — introduced to stop re-downloading ~5 GB of test models from | |
| # HuggingFace on every run (also dodges HF rate-limits). Complements the sccache compiler | |
| # cache but is always ON: there is intentionally NO on/off flag for it (it is GitHub's | |
| # free cache, safe + free), whereas the sccache cache is toggled by the `use_cache` | |
| # workflow_dispatch input / USE_CACHE env. Not Depot — GB-scale blobs are usage-priced | |
| # there and its file cache needs Depot-hosted runners. See CLAUDE.md. | |
| - name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace) | |
| uses: actions/cache@v5 | |
| with: | |
| path: models/ | |
| # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; | |
| # bump the suffix when the model set / URLs change. | |
| key: gguf-models-v1 | |
| - name: Download text generation model | |
| run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME} | |
| - name: Download reranking model | |
| run: test -f models/${RERANKING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${RERANKING_MODEL_URL} --create-dirs -o models/${RERANKING_MODEL_NAME} | |
| - name: Download draft model | |
| run: test -f models/${DRAFT_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${DRAFT_MODEL_URL} --create-dirs -o models/${DRAFT_MODEL_NAME} | |
| - name: Download reasoning model | |
| run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME} | |
| - name: Download tool-calling model | |
| run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME} | |
| - name: Download nomic embedding model (issue #98 regression) | |
| run: test -f models/${NOMIC_EMBED_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${NOMIC_EMBED_MODEL_URL} --create-dirs -o models/${NOMIC_EMBED_MODEL_NAME} | |
| - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34) | |
| run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME} | |
| - name: Download vision mmproj | |
| run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME} | |
| - name: List files in models directory | |
| run: ls -l models/ | |
| - name: Validate model files | |
| run: bash .github/validate-models.sh | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Memory before tests | |
| run: free -h | |
| - name: Enable core dumps | |
| run: | | |
| ulimit -c unlimited | |
| echo "${{ github.workspace }}/core.%e.%p" | sudo tee /proc/sys/kernel/core_pattern | |
| - name: Download TTS model (OuteTTS) | |
| run: test -f models/${TTS_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TTS_MODEL_URL} --create-dirs -o models/${TTS_MODEL_NAME} | |
| - name: Download TTS vocoder (WavTokenizer) | |
| run: test -f models/${TTS_VOCODER_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TTS_VOCODER_URL} --create-dirs -o models/${TTS_VOCODER_NAME} | |
| - name: Run tests | |
| run: | | |
| mvn -e --no-transfer-progress -P jcstress test \ | |
| -Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.nomic.path=models/${NOMIC_EMBED_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \ | |
| -Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH} \ | |
| -Dnet.ladenthin.llama.tts.ttc.model=models/${TTS_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.tts.vocoder.model=models/${TTS_VOCODER_NAME} | |
| - uses: actions/upload-artifact@v7 | |
| if: success() | |
| with: | |
| name: jacoco-report | |
| path: target/site/jacoco/jacoco.xml | |
| if-no-files-found: ignore | |
| - name: Run PIT mutation tests | |
| run: mvn --batch-mode --no-transfer-progress test-compile org.pitest:pitest-maven:mutationCoverage | |
| - name: Extract PIT survivors | |
| if: always() | |
| run: | | |
| echo "=== PIT Survived Mutations ===" | |
| for html_file in $(find target/pit-reports -name "*.html" -type f 2>/dev/null | sort); do | |
| if grep -q "SURVIVED" "$html_file"; then | |
| echo "Found survivors in $html_file:" | |
| grep -B 2 -A 3 "SURVIVED" "$html_file" | |
| echo "" | |
| fi | |
| done | |
| - uses: actions/upload-artifact@v7 | |
| if: always() | |
| with: { name: pit-reports, path: target/pit-reports/ } | |
| - name: Memory after tests | |
| if: always() | |
| run: free -h | |
| - if: failure() | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: error-log-linux-x86_64 | |
| path: | | |
| ${{ github.workspace }}/hs_err_pid*.log | |
| ${{ github.workspace }}/core.* | |
| ${{ github.workspace }}/*.hprof | |
| ${{ github.workspace }}/target/surefire-reports/*.dump | |
| ${{ github.workspace }}/target/surefire-reports/*.dumpstream | |
| ${{ github.workspace }}/target/surefire-reports/*.txt | |
| ${{ github.workspace }}/target/surefire-reports/TEST-*.xml | |
| if-no-files-found: warn | |
| # --------------------------------------------------------------------------- | |
| # vmlens interleaving analysis — pure-Java, needs no native library or models. | |
| # Staged to a single smoke test for now (see the `vmlens` profile in pom.xml). | |
| # --------------------------------------------------------------------------- | |
| vmlens: | |
| name: Test (vmlens interleavings) | |
| needs: startgate | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| cache: maven | |
| - name: Test under vmlens (interleaving analysis) | |
| # Add each new test in the `vmlens` package to this -Dtest list (surefire | |
| # -Dtest matches simple class names, not package globs; the default suite is | |
| # excluded from the vmlens package via pom.xml managed surefire <excludes>). | |
| run: >- | |
| mvn --batch-mode --no-transfer-progress -Pvmlens test | |
| -Dtest=VmlensInterleavingSmokeTest,SessionStateInterleavingTest -DfailIfNoTests=false | |
| - uses: actions/upload-artifact@v7 | |
| if: always() | |
| with: | |
| name: vmlens-report | |
| path: target/vmlens-report/ | |
| if-no-files-found: ignore | |
| test-java-macos-arm64-metal: | |
| name: Java Tests macOS 14 arm64 (Metal) | |
| needs: build-macos-arm64-metal | |
| runs-on: macos-14 | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== CPU Information ===" | |
| sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null || true | |
| echo "" | |
| echo "=== Processor Details ===" | |
| system_profiler SPHardwareDataType | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: macos-14-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| # GGUF model cache — introduced to stop re-downloading ~5 GB of test models from | |
| # HuggingFace on every run (also dodges HF rate-limits). Complements the sccache compiler | |
| # cache but is always ON: there is intentionally NO on/off flag for it (it is GitHub's | |
| # free cache, safe + free), whereas the sccache cache is toggled by the `use_cache` | |
| # workflow_dispatch input / USE_CACHE env. Not Depot — GB-scale blobs are usage-priced | |
| # there and its file cache needs Depot-hosted runners. See CLAUDE.md. | |
| - name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace) | |
| uses: actions/cache@v5 | |
| with: | |
| path: models/ | |
| # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; | |
| # bump the suffix when the model set / URLs change. | |
| key: gguf-models-v1 | |
| - name: Download text generation model | |
| run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME} | |
| - name: Download reranking model | |
| run: test -f models/${RERANKING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${RERANKING_MODEL_URL} --create-dirs -o models/${RERANKING_MODEL_NAME} | |
| - name: Download draft model | |
| run: test -f models/${DRAFT_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${DRAFT_MODEL_URL} --create-dirs -o models/${DRAFT_MODEL_NAME} | |
| - name: Download reasoning model | |
| run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME} | |
| - name: Download tool-calling model | |
| run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME} | |
| - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34) | |
| run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME} | |
| - name: Download vision mmproj | |
| run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME} | |
| - name: List files in models directory | |
| run: ls -l models/ | |
| - name: Validate model files | |
| run: bash .github/validate-models.sh | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Memory before tests | |
| run: vm_stat && sysctl hw.memsize hw.physmem | |
| - name: Enable core dumps | |
| run: ulimit -c unlimited | |
| - name: Run tests | |
| run: | | |
| mvn -e --no-transfer-progress -Dnet.ladenthin.llama.test.ngl=0 test \ | |
| -Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \ | |
| -Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH} | |
| - name: Memory after tests | |
| if: always() | |
| run: vm_stat && sysctl hw.memsize hw.physmem | |
| - if: failure() | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: error-log-macos-14-metal | |
| path: | | |
| ${{ github.workspace }}/hs_err_pid*.log | |
| ${{ github.workspace }}/*.hprof | |
| ${{ github.workspace }}/target/surefire-reports/*.dump | |
| ${{ github.workspace }}/target/surefire-reports/*.dumpstream | |
| ${{ github.workspace }}/target/surefire-reports/*.txt | |
| ${{ github.workspace }}/target/surefire-reports/TEST-*.xml | |
| if-no-files-found: warn | |
| test-java-macos-arm64-no-metal: | |
| name: Java Tests macOS 15 arm64 (no Metal) | |
| needs: build-macos-arm64-no-metal | |
| runs-on: macos-15 | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== CPU Information ===" | |
| sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null || true | |
| echo "" | |
| echo "=== Processor Details ===" | |
| system_profiler SPHardwareDataType | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: macos-15-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| # GGUF model cache — introduced to stop re-downloading ~5 GB of test models from | |
| # HuggingFace on every run (also dodges HF rate-limits). Complements the sccache compiler | |
| # cache but is always ON: there is intentionally NO on/off flag for it (it is GitHub's | |
| # free cache, safe + free), whereas the sccache cache is toggled by the `use_cache` | |
| # workflow_dispatch input / USE_CACHE env. Not Depot — GB-scale blobs are usage-priced | |
| # there and its file cache needs Depot-hosted runners. See CLAUDE.md. | |
| - name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace) | |
| uses: actions/cache@v5 | |
| with: | |
| path: models/ | |
| # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; | |
| # bump the suffix when the model set / URLs change. | |
| key: gguf-models-v1 | |
| - name: Download text generation model | |
| run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME} | |
| - name: Download reranking model | |
| run: test -f models/${RERANKING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${RERANKING_MODEL_URL} --create-dirs -o models/${RERANKING_MODEL_NAME} | |
| - name: Download draft model | |
| run: test -f models/${DRAFT_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${DRAFT_MODEL_URL} --create-dirs -o models/${DRAFT_MODEL_NAME} | |
| - name: Download reasoning model | |
| run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME} | |
| - name: Download tool-calling model | |
| run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME} | |
| - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34) | |
| run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME} | |
| - name: Download vision mmproj | |
| run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME} | |
| - name: List files in models directory | |
| run: ls -l models/ | |
| - name: Validate model files | |
| run: bash .github/validate-models.sh | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Memory before tests | |
| run: vm_stat && sysctl hw.memsize hw.physmem | |
| - name: Enable core dumps | |
| run: ulimit -c unlimited | |
| - name: Run tests | |
| run: | | |
| mvn -e --no-transfer-progress test \ | |
| -Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \ | |
| -Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH} | |
| - name: Memory after tests | |
| if: always() | |
| run: vm_stat && sysctl hw.memsize hw.physmem | |
| - if: failure() | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: error-log-macos-15-no-metal | |
| path: | | |
| ${{ github.workspace }}/hs_err_pid*.log | |
| ${{ github.workspace }}/*.hprof | |
| ${{ github.workspace }}/target/surefire-reports/*.dump | |
| ${{ github.workspace }}/target/surefire-reports/*.dumpstream | |
| ${{ github.workspace }}/target/surefire-reports/*.txt | |
| ${{ github.workspace }}/target/surefire-reports/TEST-*.xml | |
| if-no-files-found: warn | |
| test-java-macos-arm64-metal-15: | |
| name: Java Tests macOS 15 arm64 (Metal) | |
| needs: build-macos-arm64-metal-15 | |
| runs-on: macos-15 | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Display CPU Info | |
| shell: bash | |
| run: | | |
| echo "=== CPU Information ===" | |
| sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null || true | |
| echo "" | |
| echo "=== Processor Details ===" | |
| system_profiler SPHardwareDataType | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: macos-15-metal-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| # GGUF model cache — introduced to stop re-downloading ~5 GB of test models from | |
| # HuggingFace on every run (also dodges HF rate-limits). Complements the sccache compiler | |
| # cache but is always ON: there is intentionally NO on/off flag for it (it is GitHub's | |
| # free cache, safe + free), whereas the sccache cache is toggled by the `use_cache` | |
| # workflow_dispatch input / USE_CACHE env. Not Depot — GB-scale blobs are usage-priced | |
| # there and its file cache needs Depot-hosted runners. See CLAUDE.md. | |
| - name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace) | |
| uses: actions/cache@v5 | |
| with: | |
| path: models/ | |
| # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; | |
| # bump the suffix when the model set / URLs change. | |
| key: gguf-models-v1 | |
| - name: Download text generation model | |
| run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME} | |
| - name: Download reranking model | |
| run: test -f models/${RERANKING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${RERANKING_MODEL_URL} --create-dirs -o models/${RERANKING_MODEL_NAME} | |
| - name: Download draft model | |
| run: test -f models/${DRAFT_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${DRAFT_MODEL_URL} --create-dirs -o models/${DRAFT_MODEL_NAME} | |
| - name: Download reasoning model | |
| run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME} | |
| - name: Download tool-calling model | |
| run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME} | |
| - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34) | |
| run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME} | |
| - name: Download vision mmproj | |
| run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME} | |
| - name: List files in models directory | |
| run: ls -l models/ | |
| - name: Validate model files | |
| run: bash .github/validate-models.sh | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Memory before tests | |
| run: vm_stat && sysctl hw.memsize hw.physmem | |
| - name: Enable core dumps | |
| run: ulimit -c unlimited | |
| - name: Run tests | |
| run: | | |
| mvn -e --no-transfer-progress test \ | |
| -Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \ | |
| -Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \ | |
| -Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH} | |
| - name: Memory after tests | |
| if: always() | |
| run: vm_stat && sysctl hw.memsize hw.physmem | |
| - if: failure() | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: error-log-macos-15-metal | |
| path: | | |
| ${{ github.workspace }}/hs_err_pid*.log | |
| ${{ github.workspace }}/*.hprof | |
| ${{ github.workspace }}/target/surefire-reports/*.dump | |
| ${{ github.workspace }}/target/surefire-reports/*.dumpstream | |
| ${{ github.workspace }}/target/surefire-reports/*.txt | |
| ${{ github.workspace }}/target/surefire-reports/TEST-*.xml | |
| if-no-files-found: warn | |
| test-java-windows-x86_64: | |
| name: Java Tests Windows 2025 x86_64 (VS 2026) | |
| needs: build-windows-x86_64 | |
| runs-on: windows-2025-vs2026 | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Display CPU Info | |
| shell: pwsh | |
| run: | | |
| Write-Host "=== CPU Information (Get-CimInstance - All Properties) ===" | |
| Get-CimInstance Win32_Processor | Select-Object * | Format-List | |
| Write-Host "" | |
| Write-Host "=== CPU Information (systeminfo) ===" | |
| systeminfo | Select-String "Processor" | |
| Write-Host "" | |
| Write-Host "=== CPU Information (Get-ComputerInfo) ===" | |
| Get-ComputerInfo -Property "CsProcessors*" 2>$null || Write-Host "Get-ComputerInfo not available" | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86_64-libraries | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| - name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace) | |
| uses: actions/cache@v5 | |
| with: | |
| path: models/ | |
| # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; | |
| # bump the suffix when the model set / URLs change. | |
| key: gguf-models-v1 | |
| - name: Download text generation model | |
| run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME } | |
| - name: Download reranking model | |
| run: if (-not (Test-Path "models/$env:RERANKING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME } | |
| - name: Download draft model | |
| run: if (-not (Test-Path "models/$env:DRAFT_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME } | |
| - name: Download reasoning model | |
| run: if (-not (Test-Path "models/$env:REASONING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME } | |
| - name: Download tool-calling model | |
| run: if (-not (Test-Path "models/$env:TOOL_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME } | |
| - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34) | |
| run: if (-not (Test-Path "models/$env:VISION_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME } | |
| - name: Download vision mmproj | |
| run: if (-not (Test-Path "models/$env:VISION_MMPROJ_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME } | |
| - name: List files in models directory | |
| run: ls -l models/ | |
| - name: Validate model files | |
| run: .github\validate-models.bat | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Memory before tests | |
| run: Get-CimInstance Win32_OperatingSystem | Select-Object FreePhysicalMemory,TotalVisibleMemorySize | Format-List | |
| shell: pwsh | |
| - name: Enable WER LocalDumps for java.exe | |
| # Windows Error Reporting writes minidumps when java.exe (or any other | |
| # registered process) crashes via __fastfail / abort / unhandled SEH. | |
| # We use it as the Windows analogue of Linux core dumps so that a JVM | |
| # crash inside the JNI layer leaves us a real native callstack instead | |
| # of just surefire's "VM terminated without saying goodbye" line. | |
| # DumpType=2 == MiniDumpWithFullMemory; the workspace dumps/ folder is | |
| # globbed by the failure-upload step below. | |
| shell: pwsh | |
| run: | | |
| $key = 'HKLM:\SOFTWARE\Microsoft\Windows\Windows Error Reporting\LocalDumps\java.exe' | |
| New-Item -Path $key -Force | Out-Null | |
| New-Item -Path "${{ github.workspace }}\dumps" -ItemType Directory -Force | Out-Null | |
| New-ItemProperty -Path $key -Name 'DumpFolder' -Value "${{ github.workspace }}\dumps" -PropertyType ExpandString -Force | Out-Null | |
| New-ItemProperty -Path $key -Name 'DumpType' -Value 2 -PropertyType DWord -Force | Out-Null | |
| New-ItemProperty -Path $key -Name 'DumpCount' -Value 5 -PropertyType DWord -Force | Out-Null | |
| Get-ItemProperty -Path $key | Format-List | |
| - name: Run tests | |
| run: | | |
| mvn -e --no-transfer-progress test ` | |
| "-Dnet.ladenthin.llama.tool.model=models/$env:TOOL_MODEL_NAME" ` | |
| "-Dnet.ladenthin.llama.vision.model=models/$env:VISION_MODEL_NAME" ` | |
| "-Dnet.ladenthin.llama.vision.mmproj=models/$env:VISION_MMPROJ_NAME" ` | |
| "-Dnet.ladenthin.llama.vision.image=$env:VISION_IMAGE_PATH" | |
| - name: Memory after tests | |
| if: always() | |
| run: Get-CimInstance Win32_OperatingSystem | Select-Object FreePhysicalMemory,TotalVisibleMemorySize | Format-List | |
| shell: pwsh | |
| - if: failure() | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: windows-output | |
| path: | | |
| ${{ github.workspace }}\hs_err_pid*.log | |
| ${{ github.workspace }}\*.hprof | |
| ${{ github.workspace }}\dumps\*.dmp | |
| ${{ github.workspace }}\target\surefire-reports\*.dump | |
| ${{ github.workspace }}\target\surefire-reports\*.dumpstream | |
| ${{ github.workspace }}\target\surefire-reports\*.txt | |
| ${{ github.workspace }}\target\surefire-reports\TEST-*.xml | |
| ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/**/* | |
| if-no-files-found: warn | |
| # Java/inference validation of the Ninja-built x86_64 DLL (the analogue of | |
| # test-java-windows-x86_64 for the MSVC build). Loads the Ninja jllama.dll via | |
| # JNI and runs the full model-backed suite, so both Windows generators are | |
| # validated end-to-end before the `ninja-windows` classifier JAR ships. | |
| test-java-windows-x86_64-ninja: | |
| name: Java Tests Windows 2025 x86_64 (Ninja, eval) | |
| needs: build-windows-x86_64-ninja | |
| runs-on: windows-2025-vs2026 | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - name: Display CPU Info | |
| shell: pwsh | |
| run: | | |
| Write-Host "=== CPU Information (Get-CimInstance - All Properties) ===" | |
| Get-CimInstance Win32_Processor | Select-Object * | Format-List | |
| Write-Host "" | |
| Write-Host "=== CPU Information (systeminfo) ===" | |
| systeminfo | Select-String "Processor" | |
| Write-Host "" | |
| Write-Host "=== CPU Information (Get-ComputerInfo) ===" | |
| Get-ComputerInfo -Property "CsProcessors*" 2>$null || Write-Host "Get-ComputerInfo not available" | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86_64-ninja | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| - name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace) | |
| uses: actions/cache@v5 | |
| with: | |
| path: models/ | |
| # GGUF is platform-independent, so ubuntu + macOS + Windows share one entry; | |
| # bump the suffix when the model set / URLs change. | |
| key: gguf-models-v1 | |
| - name: Download text generation model | |
| run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME } | |
| - name: Download reranking model | |
| run: if (-not (Test-Path "models/$env:RERANKING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME } | |
| - name: Download draft model | |
| run: if (-not (Test-Path "models/$env:DRAFT_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME } | |
| - name: Download reasoning model | |
| run: if (-not (Test-Path "models/$env:REASONING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME } | |
| - name: Download tool-calling model | |
| run: if (-not (Test-Path "models/$env:TOOL_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME } | |
| - name: Download vision model (upstream kherud/java-llama.cpp#103 / #34) | |
| run: if (-not (Test-Path "models/$env:VISION_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME } | |
| - name: Download vision mmproj | |
| run: if (-not (Test-Path "models/$env:VISION_MMPROJ_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME } | |
| - name: List files in models directory | |
| run: ls -l models/ | |
| - name: Validate model files | |
| run: .github\validate-models.bat | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Memory before tests | |
| run: Get-CimInstance Win32_OperatingSystem | Select-Object FreePhysicalMemory,TotalVisibleMemorySize | Format-List | |
| shell: pwsh | |
| - name: Enable WER LocalDumps for java.exe | |
| # Windows Error Reporting writes minidumps when java.exe (or any other | |
| # registered process) crashes via __fastfail / abort / unhandled SEH. | |
| # We use it as the Windows analogue of Linux core dumps so that a JVM | |
| # crash inside the JNI layer leaves us a real native callstack instead | |
| # of just surefire's "VM terminated without saying goodbye" line. | |
| # DumpType=2 == MiniDumpWithFullMemory; the workspace dumps/ folder is | |
| # globbed by the failure-upload step below. | |
| shell: pwsh | |
| run: | | |
| $key = 'HKLM:\SOFTWARE\Microsoft\Windows\Windows Error Reporting\LocalDumps\java.exe' | |
| New-Item -Path $key -Force | Out-Null | |
| New-Item -Path "${{ github.workspace }}\dumps" -ItemType Directory -Force | Out-Null | |
| New-ItemProperty -Path $key -Name 'DumpFolder' -Value "${{ github.workspace }}\dumps" -PropertyType ExpandString -Force | Out-Null | |
| New-ItemProperty -Path $key -Name 'DumpType' -Value 2 -PropertyType DWord -Force | Out-Null | |
| New-ItemProperty -Path $key -Name 'DumpCount' -Value 5 -PropertyType DWord -Force | Out-Null | |
| Get-ItemProperty -Path $key | Format-List | |
| - name: Run tests | |
| run: | | |
| mvn -e --no-transfer-progress test ` | |
| "-Dnet.ladenthin.llama.tool.model=models/$env:TOOL_MODEL_NAME" ` | |
| "-Dnet.ladenthin.llama.vision.model=models/$env:VISION_MODEL_NAME" ` | |
| "-Dnet.ladenthin.llama.vision.mmproj=models/$env:VISION_MMPROJ_NAME" ` | |
| "-Dnet.ladenthin.llama.vision.image=$env:VISION_IMAGE_PATH" | |
| - name: Memory after tests | |
| if: always() | |
| run: Get-CimInstance Win32_OperatingSystem | Select-Object FreePhysicalMemory,TotalVisibleMemorySize | Format-List | |
| shell: pwsh | |
| - if: failure() | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: windows-output-ninja | |
| path: | | |
| ${{ github.workspace }}\hs_err_pid*.log | |
| ${{ github.workspace }}\*.hprof | |
| ${{ github.workspace }}\dumps\*.dmp | |
| ${{ github.workspace }}\target\surefire-reports\*.dump | |
| ${{ github.workspace }}\target\surefire-reports\*.dumpstream | |
| ${{ github.workspace }}\target\surefire-reports\*.txt | |
| ${{ github.workspace }}\target\surefire-reports\TEST-*.xml | |
| ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/**/* | |
| if-no-files-found: warn | |
| # --------------------------------------------------------------------------- | |
| # Package and publish | |
| # --------------------------------------------------------------------------- | |
| package: | |
| name: Package JARs | |
| needs: | |
| - crosscompile-linux-x86_64-cuda | |
| - crosscompile-linux-aarch64 | |
| - crosscompile-android-aarch64 | |
| - crosscompile-android-aarch64-opencl | |
| - build-windows-x86 | |
| - build-windows-x86_64-ninja | |
| - build-windows-x86-ninja | |
| - test-cpp-linux-x86_64 | |
| - build-macos-arm64-metal-15 | |
| - test-java-linux-x86_64 | |
| - test-java-macos-arm64-metal | |
| - test-java-macos-arm64-no-metal | |
| - test-java-macos-arm64-metal-15 | |
| - test-java-windows-x86_64 | |
| - test-java-windows-x86_64-ninja | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| pattern: "*-libraries" | |
| merge-multiple: true | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: linux-libraries-cuda | |
| path: ${{ github.workspace }}/src/main/resources_linux_cuda/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: android-libraries-opencl | |
| path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/ | |
| # Ninja-built Windows natives -> separate tree consumed by the `windows-ninja` | |
| # Maven profile (the `ninja-windows` classifier JAR). The default JAR keeps the | |
| # MSVC `*-libraries` natives downloaded above. | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86_64-ninja | |
| path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86-ninja | |
| path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/ | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: 'temurin' | |
| java-version: ${{ env.JAVA_VERSION }} | |
| - name: Build JARs | |
| # `assembly` additionally produces the fat jar-with-dependencies uber JAR | |
| # (llama-<version>-jar-with-dependencies.jar: library classes + Java runtime deps + | |
| # default-platform native libs in one drop-on-classpath JAR, runnable via its | |
| # OpenAiCompatServer Main-Class). It lands in target/ and is uploaded in the `llama-jars` | |
| # artifact below - a CI run artifact only, not a Maven Central / GitHub-Release asset. | |
| # `windows-ninja` attaches the `ninja-windows` classifier JAR (Ninja-built Windows natives). | |
| run: mvn --batch-mode --no-transfer-progress -P release,cuda,opencl-android,windows-ninja,assembly -Dmaven.test.skip=true -Dgpg.skip=true package | |
| - name: Upload JARs | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: llama-jars | |
| path: target/*.jar | |
| report: | |
| name: Report | |
| needs: [package] | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: write | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - uses: actions/setup-java@v5 | |
| with: { java-version: '${{ env.JAVA_VERSION }}', distribution: temurin } | |
| - uses: actions/download-artifact@v8 | |
| with: { name: jacoco-report, path: target/site/jacoco/ } | |
| continue-on-error: true | |
| - uses: advanced-security/maven-dependency-submission-action@v5 | |
| - name: Coveralls | |
| uses: coverallsapp/github-action@v2 | |
| with: | |
| github-token: ${{ secrets.GITHUB_TOKEN }} | |
| file: target/site/jacoco/jacoco.xml | |
| format: jacoco | |
| continue-on-error: true | |
| - name: Codecov | |
| uses: codecov/codecov-action@v7 | |
| with: | |
| token: ${{ secrets.CODECOV_TOKEN }} | |
| files: target/site/jacoco/jacoco.xml | |
| continue-on-error: true | |
| check-snapshot: | |
| name: "Check: main branch / SNAPSHOT" | |
| needs: [report] | |
| runs-on: ubuntu-latest | |
| if: >- | |
| (github.event_name == 'push' && github.ref == 'refs/heads/main') || | |
| (github.event_name == 'workflow_dispatch' && !startsWith(github.ref, 'refs/tags/v')) | |
| steps: | |
| - name: Confirm snapshot ref | |
| run: echo "Confirmed on snapshot ref ${{ github.ref }}" | |
| check-tag: | |
| name: "Check: v* tag" | |
| needs: [report] | |
| runs-on: ubuntu-latest | |
| if: startsWith(github.ref, 'refs/tags/v') | |
| steps: | |
| - name: Confirm tag ref | |
| run: echo "Confirmed on tag ${{ github.ref }}" | |
| publish-snapshot: | |
| name: Publish Snapshot to Central | |
| needs: [check-snapshot, crosscompile-linux-x86_64-cuda, crosscompile-android-aarch64-opencl, code-style] | |
| if: needs.check-snapshot.result == 'success' && inputs.publish_to_central | |
| runs-on: ubuntu-latest | |
| environment: maven-central | |
| permissions: | |
| contents: write | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| pattern: "*-libraries" | |
| merge-multiple: true | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: linux-libraries-cuda | |
| path: ${{ github.workspace }}/src/main/resources_linux_cuda/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: android-libraries-opencl | |
| path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86_64-ninja | |
| path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86-ninja | |
| path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/ | |
| - name: Set up Maven Central Repository | |
| uses: actions/setup-java@v5 | |
| with: | |
| java-version: ${{ env.JAVA_VERSION }} | |
| distribution: 'temurin' | |
| server-id: central | |
| server-username: MAVEN_USERNAME | |
| server-password: MAVEN_PASSWORD | |
| gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }} | |
| gpg-passphrase: MAVEN_GPG_PASSPHRASE | |
| - name: Guard - require a -SNAPSHOT version | |
| shell: bash | |
| run: | | |
| VERSION=$(mvn -q -DforceStdout help:evaluate -Dexpression=project.version | tail -n1) | |
| echo "Resolved project version: $VERSION" | |
| case "$VERSION" in | |
| *-SNAPSHOT) echo "OK: -SNAPSHOT version, continuing snapshot deploy." ;; | |
| *) echo "::error::Refusing to publish non-SNAPSHOT version '$VERSION' from the snapshot job. Snapshot publishing requires a -SNAPSHOT version; releases go through the v* tag path."; exit 1 ;; | |
| esac | |
| - name: Publish snapshot | |
| run: mvn --batch-mode --no-transfer-progress -P release,cuda,opencl-android,windows-ninja -Dmaven.test.skip=true deploy | |
| env: | |
| MAVEN_USERNAME: ${{ secrets.CENTRAL_USERNAME }} | |
| MAVEN_PASSWORD: ${{ secrets.CENTRAL_TOKEN }} | |
| MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} | |
| - name: Collect signed artifacts | |
| run: | | |
| mkdir -p signed-snapshot-assets | |
| cp target/*.jar signed-snapshot-assets/ 2>/dev/null || true | |
| cp target/*.jar.asc signed-snapshot-assets/ 2>/dev/null || true | |
| - uses: actions/upload-artifact@v7 | |
| with: | |
| name: signed-snapshot-assets | |
| path: signed-snapshot-assets/ | |
| github-snapshot: | |
| name: Update Snapshot Pre-release on GitHub | |
| needs: [publish-snapshot] | |
| if: needs.publish-snapshot.result == 'success' | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: write | |
| steps: | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: signed-snapshot-assets | |
| path: snapshot-assets/ | |
| - name: Update snapshot pre-release | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| gh release view snapshot --repo ${{ github.repository }} 2>/dev/null \ | |
| || gh release create snapshot \ | |
| --repo ${{ github.repository }} \ | |
| --prerelease \ | |
| --title "Snapshot (latest)" \ | |
| --notes "Latest snapshot build from the main branch." | |
| gh release upload snapshot snapshot-assets/* \ | |
| --repo ${{ github.repository }} \ | |
| --clobber | |
| publish-release: | |
| name: Publish Release to Central | |
| if: needs.check-tag.result == 'success' && inputs.publish_to_central | |
| needs: [check-tag, crosscompile-linux-x86_64-cuda, crosscompile-android-aarch64-opencl, code-style] | |
| runs-on: ubuntu-latest | |
| environment: maven-central | |
| permissions: | |
| contents: write | |
| steps: | |
| - uses: actions/checkout@v7 | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| pattern: "*-libraries" | |
| merge-multiple: true | |
| path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: linux-libraries-cuda | |
| path: ${{ github.workspace }}/src/main/resources_linux_cuda/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: android-libraries-opencl | |
| path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86_64-ninja | |
| path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/ | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: Windows-x86-ninja | |
| path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/ | |
| - name: Set up Maven Central Repository | |
| uses: actions/setup-java@v5 | |
| with: | |
| java-version: ${{ env.JAVA_VERSION }} | |
| distribution: 'temurin' | |
| server-id: central | |
| server-username: MAVEN_USERNAME | |
| server-password: MAVEN_PASSWORD | |
| gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }} | |
| gpg-passphrase: MAVEN_GPG_PASSPHRASE | |
| - name: Publish release | |
| run: mvn --batch-mode --no-transfer-progress -P release,cuda,opencl-android,windows-ninja -Dmaven.test.skip=true deploy | |
| env: | |
| MAVEN_USERNAME: ${{ secrets.CENTRAL_USERNAME }} | |
| MAVEN_PASSWORD: ${{ secrets.CENTRAL_TOKEN }} | |
| MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} | |
| - name: Collect signed artifacts | |
| run: | | |
| mkdir -p signed-release-assets | |
| cp target/*.jar signed-release-assets/ 2>/dev/null || true | |
| cp target/*.jar.asc signed-release-assets/ 2>/dev/null || true | |
| - uses: actions/upload-artifact@v7 | |
| with: | |
| name: signed-release-assets | |
| path: signed-release-assets/ | |
| github-release-signed: | |
| name: Attach Signed Binaries to GitHub Release | |
| needs: [publish-release] | |
| if: needs.publish-release.result == 'success' | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: write | |
| steps: | |
| - uses: actions/download-artifact@v8 | |
| with: | |
| name: signed-release-assets | |
| path: release-assets/ | |
| - name: Upload release assets | |
| uses: softprops/action-gh-release@v3 | |
| with: | |
| files: release-assets/* |