Skip to content

docs: Add AI-assisted development note to README #437

docs: Add AI-assisted development note to README

docs: Add AI-assisted development note to README #437

Workflow file for this run

# SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
# SPDX-FileCopyrightText: 2023-2025 Konstantin Herud
#
# SPDX-License-Identifier: MIT
name: Publish
on:
push:
branches: [ main ]
tags: ['v*']
pull_request:
workflow_dispatch:
inputs:
publish_to_central:
description: "Deploy to Maven Central (snapshot if -SNAPSHOT, release if a vX.Y.Z tag)"
type: boolean
default: false
use_cache:
description: "Use the shared sccache/Depot compiler cache (faster incremental builds)"
type: boolean
default: true
env:
JAVA_VERSION: '21'
MODEL_URL: "https://huggingface.co/TheBloke/CodeLlama-7B-GGUF/resolve/main/codellama-7b.Q2_K.gguf"
MODEL_NAME: "codellama-7b.Q2_K.gguf"
RERANKING_MODEL_URL: "https://huggingface.co/gpustack/jina-reranker-v1-tiny-en-GGUF/resolve/main/jina-reranker-v1-tiny-en-Q4_0.gguf"
RERANKING_MODEL_NAME: "jina-reranker-v1-tiny-en-Q4_0.gguf"
DRAFT_MODEL_URL: "https://huggingface.co/QuantFactory/AMD-Llama-135m-code-GGUF/resolve/main/AMD-Llama-135m-code.Q2_K.gguf"
DRAFT_MODEL_NAME: "AMD-Llama-135m-code.Q2_K.gguf"
REASONING_MODEL_URL: "https://huggingface.co/unsloth/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B-Q4_K_M.gguf"
REASONING_MODEL_NAME: "Qwen3-0.6B-Q4_K_M.gguf"
TOOL_MODEL_URL: "https://huggingface.co/bartowski/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
TOOL_MODEL_NAME: "Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
NOMIC_EMBED_MODEL_URL: "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF/resolve/main/nomic-embed-text-v1.5.f16.gguf"
NOMIC_EMBED_MODEL_NAME: "nomic-embed-text-v1.5.f16.gguf"
# Vision model + mmproj for MultimodalIntegrationTest (upstream kherud/java-llama.cpp#103 / #34).
# SmolVLM-500M is the smallest community vision GGUF that loads reliably
# under the upstream mtmd pipeline. Total download ~600 MB across model
# plus mmproj; matches the existing per-test-job download budget.
VISION_MODEL_URL: "https://huggingface.co/ggml-org/SmolVLM-500M-Instruct-GGUF/resolve/main/SmolVLM-500M-Instruct-Q8_0.gguf"
VISION_MODEL_NAME: "SmolVLM-500M-Instruct-Q8_0.gguf"
VISION_MMPROJ_URL: "https://huggingface.co/ggml-org/SmolVLM-500M-Instruct-GGUF/resolve/main/mmproj-SmolVLM-500M-Instruct-Q8_0.gguf"
VISION_MMPROJ_NAME: "mmproj-SmolVLM-500M-Instruct-Q8_0.gguf"
# Test image used by MultimodalIntegrationTest is committed to the repo
# at src/test/resources/images/test-image.jpg (see the README in that
# directory for licensing). No download step is needed; CI just points
# mvn test at the committed path.
VISION_IMAGE_PATH: "src/test/resources/images/test-image.jpg"
permissions:
contents: read
jobs:
# ---------------------------------------------------------------------------
# Start gate — single cancellable abort window before the pipeline starts.
# The wait duration lives in the `startgate` GitHub Environment (Settings →
# Environments → startgate → Wait timer).
# ---------------------------------------------------------------------------
startgate:
name: Start gate (abort window)
runs-on: ubuntu-latest
environment: startgate
steps:
- run: echo "Start gate elapsed — proceeding with pipeline."
# ---------------------------------------------------------------------------
# Cross-compile jobs (Docker / dockcross) — produce release artifacts, no testing
# ---------------------------------------------------------------------------
code-style:
name: Code style (spotless) + package graph
needs: startgate
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v7
- uses: actions/setup-java@v5
with:
java-version: '21'
distribution: temurin
- name: Spotless check (fail fast on format violations)
run: mvn -B --no-transfer-progress spotless:check
- name: Print internal package dependency graph (jdeps, informational)
continue-on-error: true
run: |
mvn -B --no-transfer-progress -DskipTests -Denforcer.skip=true compile
echo "=== internal package dependency graph (jdeps, bytecode) ==="
jdeps -verbose:package target/classes | grep 'net.ladenthin.llama' || true
# ---------------------------------------------------------------------------
# Build the llama.cpp WebUI ONCE, from the same pinned tag CMakeLists.txt fetches,
# and share it to every native build as the generated, platform-independent
# ui.cpp/ui.h ("webui-generated" artifact). The native builds embed it into
# libjllama (CMake's "WebUI assets" block); when this job's artifact is absent the
# build falls back to the empty-asset stub. npm runs only here, in one controlled
# job — never in the dockcross cross-compilers (which have no node) or per-platform.
# ---------------------------------------------------------------------------
build-webui:
name: Build WebUI assets (shared)
needs: startgate
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v7
- name: Resolve pinned llama.cpp tag from CMakeLists.txt
id: tag
shell: bash
run: |
TAG=$(grep -oE 'GIT_TAG[[:space:]]+b[0-9]+' CMakeLists.txt | grep -oE 'b[0-9]+' | head -1)
if [ -z "$TAG" ]; then
echo "could not resolve llama.cpp GIT_TAG (b<nnnn>) from CMakeLists.txt" >&2
exit 1
fi
echo "tag=$TAG" >> "$GITHUB_OUTPUT"
echo "Pinned llama.cpp WebUI tag: $TAG"
- name: Checkout llama.cpp tools/ui at the pinned tag
uses: actions/checkout@v7
with:
repository: ggml-org/llama.cpp
ref: ${{ steps.tag.outputs.tag }}
path: llamacpp-ui
sparse-checkout: tools/ui
sparse-checkout-cone-mode: true
- uses: actions/setup-node@v6
with:
node-version: '24'
cache: npm
cache-dependency-path: llamacpp-ui/tools/ui/package-lock.json
- name: Build WebUI (Svelte/Vite)
working-directory: llamacpp-ui/tools/ui
env:
HF_UI_VERSION: ${{ steps.tag.outputs.tag }}
LLAMA_BUILD_NUMBER: ${{ steps.tag.outputs.tag }}
run: |
npm ci --ignore-scripts
npm run build
test -f dist/index.html
- name: Embed assets into ui.cpp / ui.h (gzip parity with upstream)
working-directory: llamacpp-ui/tools/ui
shell: bash
run: |
set -euo pipefail
# gzip every asset into dist/_gzip/<path> so llama-ui-embed embeds the
# compressed bytes (LLAMA_UI_GZIP parity); embed auto-detects _gzip.
( cd dist && find . -type f -not -path './_gzip/*' | while read -r f; do
mkdir -p "_gzip/$(dirname "$f")"
gzip -9 -c "$f" > "_gzip/$f"
done )
# llama-ui-embed is a self-contained C++17 host tool (no npm) — build + run it.
g++ -O2 -std=c++17 -o llama-ui-embed embed.cpp
mkdir -p "$GITHUB_WORKSPACE/webui-generated"
./llama-ui-embed \
"$GITHUB_WORKSPACE/webui-generated/ui.cpp" \
"$GITHUB_WORKSPACE/webui-generated/ui.h" \
dist
echo "=== generated WebUI assets ==="
ls -la "$GITHUB_WORKSPACE/webui-generated"
if grep -q LLAMA_UI_HAS_ASSETS "$GITHUB_WORKSPACE/webui-generated/ui.h"; then
echo "LLAMA_UI_HAS_ASSETS: present (real WebUI embedded)"
else
echo "ERROR: embed produced an empty asset table" >&2
exit 1
fi
- name: Upload WebUI artifact
uses: actions/upload-artifact@v7
with:
name: webui-generated
path: ${{ github.workspace }}/webui-generated/
retention-days: 1
if-no-files-found: error
crosscompile-linux-x86_64-cuda:
name: Cross-Compile manylinux_2_28 x86_64 (CUDA)
needs: [startgate, build-webui]
runs-on: ubuntu-latest
# CUDA cache. build_cuda_linux.sh execs build.sh, so the same sccache probe guards this job.
# build.sh also wraps nvcc (CMAKE_CUDA_COMPILER_LAUNCHER=sccache) for CUDA builds, so the
# per-arch .cu device passes — the dominant cost of this job — cache over Depot alongside the
# gcc host TUs. Verified on a warm run: 100% hit on CUDA / CUBIN / device-code (139 CUDA hits,
# 99.86% overall), cutting the job from ~51 min cold to ~15 min warm. The job therefore always
# builds the FULL CMAKE_CUDA_ARCHITECTURES set (no single-arch shortcut) and leans on the warm
# cache for speed, so every artifact stays release-safe (runs on every GPU generation) on PR /
# push as well as publish. CUDA_FAST_BUILD still exists in build_cuda_linux.sh as a LOCAL-dev
# knob, but CI no longer sets it. The first-run sccache debug diagnostics (SCCACHE_LOG /
# SCCACHE_ERROR_LOG / RUST_BACKTRACE) were dropped now that caching is confirmed; build.sh still
# prints the `sccache --show-stats` hit table at the end of every run. Inert without DEPOT_TOKEN
# (fork PRs) or use_cache=false.
env:
USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }}
SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev
SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }}
DOCKCROSS_ARGS: "-e SCCACHE_WEBDAV_ENDPOINT -e SCCACHE_WEBDAV_TOKEN -e USE_CACHE"
steps:
- uses: actions/checkout@v7
- name: Download shared WebUI assets
uses: actions/download-artifact@v8
with:
name: webui-generated
path: ${{ github.workspace }}/webui-generated/
- name: Display CPU Info
shell: bash
run: |
echo "=== Host CPU Information ==="
lscpu
echo ""
echo "=== CPU Details from /proc/cpuinfo ==="
cat /proc/cpuinfo
- name: Build libraries
shell: bash
run: |
.github/dockcross/dockcross-manylinux_2_28-x64 .github/build_cuda_linux.sh "-DOS_NAME=Linux -DOS_ARCH=x86_64"
- name: Upload artifacts
uses: actions/upload-artifact@v7
with:
name: linux-libraries-cuda
path: ${{ github.workspace }}/src/main/resources_linux_cuda/net/ladenthin/llama/
crosscompile-linux-x86_64:
name: Cross-Compile manylinux2014 x86_64
needs: [startgate, build-webui]
runs-on: ubuntu-latest
# Phase 2 dockcross cache rollout — job 1, VERIFIED green in CI (PR #245): sccache v0.16.0
# probe passed in-container (devtoolset-10 gcc), cache ON over Depot WebDAV (cold run: 275
# objects stored). Steady-state env below — the first-run diagnostics (SCCACHE_LOG /
# SCCACHE_ERROR_LOG / RUST_BACKTRACE) were dropped now that it is proven. Inert without
# DEPOT_TOKEN (fork PRs) or with use_cache=false; a crashing sccache still falls back to a
# green uncached build via the build.sh probe.
env:
USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }}
SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev
SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }}
DOCKCROSS_ARGS: "-e SCCACHE_WEBDAV_ENDPOINT -e SCCACHE_WEBDAV_TOKEN -e USE_CACHE"
steps:
- uses: actions/checkout@v7
- name: Download shared WebUI assets
uses: actions/download-artifact@v8
with:
name: webui-generated
path: ${{ github.workspace }}/webui-generated/
- name: Display CPU Info
shell: bash
run: |
echo "=== Host CPU Information ==="
lscpu
echo ""
echo "=== CPU Details from /proc/cpuinfo ==="
cat /proc/cpuinfo
- name: Build libraries
shell: bash
run: |
.github/dockcross/dockcross-manylinux2014-x64 .github/build.sh "-DOS_NAME=Linux -DOS_ARCH=x86_64"
- name: Upload artifacts
uses: actions/upload-artifact@v7
with:
name: Linux-x86_64-libraries
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
crosscompile-linux-aarch64:
name: Build and Test Linux aarch64
needs: [startgate, build-webui]
# Native ARM64 build on GitHub's free arm64 runner, mirroring upstream llama.cpp's
# `ubuntu-cpu` aarch64 release job (ubuntu-24.04-arm + GCC 14). Replaces the former dockcross
# `linux-arm64-lts` cross-compile (GCC 8.5, glibc 2.17), which can no longer compile llama.cpp
# b9739 — its C++17 CTAD-in-`new` needs GCC >= 12. Building natively also lets us run the C++
# unit suite (ctest) on real ARM hardware for the first time (the cross build ran no tests).
# Trade-off: the glibc floor rises 2.17 -> ~2.39, the same envelope upstream's own ARM binaries
# require. GGML_NATIVE=OFF keeps the artifact portable across ARMv8 CPU generations (no
# build-host -march baked in). The job id is kept (a `needs:` target downstream); only the
# display name changed, so update any branch-protection required-check that pinned the old name.
runs-on: ubuntu-24.04-arm
env:
USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }}
SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev
SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }}
steps:
- uses: actions/checkout@v7
- name: Download shared WebUI assets
uses: actions/download-artifact@v8
with:
name: webui-generated
path: ${{ github.workspace }}/webui-generated/
- uses: actions/setup-java@v5
with:
distribution: 'temurin'
java-version: ${{ env.JAVA_VERSION }}
- name: Install toolchain (GCC 14, mirrors upstream llama.cpp ARM release)
run: |
sudo apt-get update
sudo apt-get install -y gcc-14 g++-14
echo "CC=gcc-14" >> "$GITHUB_ENV"
echo "CXX=g++-14" >> "$GITHUB_ENV"
- name: Display CPU Info
shell: bash
run: |
echo "=== Host CPU Information ==="
lscpu
echo ""
echo "=== CPU Details from /proc/cpuinfo ==="
cat /proc/cpuinfo
- name: Build libraries
shell: bash
run: |
mvn --no-transfer-progress compile
.github/build.sh "-DOS_NAME=Linux -DOS_ARCH=aarch64 -DGGML_NATIVE=OFF -DBUILD_TESTING=ON"
- name: Run C++ unit tests
run: ctest --test-dir build --output-on-failure
- name: Upload artifacts
uses: actions/upload-artifact@v7
with:
name: Linux-aarch64-libraries
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
crosscompile-android-aarch64:
name: Cross-Compile Android aarch64
needs: [startgate, build-webui]
runs-on: ubuntu-latest
# Phase 2 dockcross cache rollout — job 4. Same steady-state env as manylinux2014 (job 1);
# the build.sh probe makes it safe to enable without a separate verification run. Inert
# without DEPOT_TOKEN (fork PRs) or use_cache=false.
env:
USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }}
SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev
SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }}
DOCKCROSS_ARGS: "-e SCCACHE_WEBDAV_ENDPOINT -e SCCACHE_WEBDAV_TOKEN -e USE_CACHE"
steps:
- uses: actions/checkout@v7
- name: Download shared WebUI assets
uses: actions/download-artifact@v8
with:
name: webui-generated
path: ${{ github.workspace }}/webui-generated/
- name: Display CPU Info
shell: bash
run: |
echo "=== Host CPU Information ==="
lscpu
echo ""
echo "=== CPU Details from /proc/cpuinfo ==="
cat /proc/cpuinfo
- name: Build libraries
shell: bash
run: |
.github/dockcross/dockcross-android-arm64 .github/build.sh "-DOS_NAME=Linux-Android -DOS_ARCH=aarch64"
- name: Upload artifacts
uses: actions/upload-artifact@v7
with:
name: Linux-Android-aarch64-libraries
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
crosscompile-android-aarch64-opencl:
name: Cross-Compile Android aarch64 (OpenCL/Adreno)
needs: [startgate, build-webui]
runs-on: ubuntu-latest
# Phase 2 dockcross cache rollout — job 5. build_opencl_android.sh stages the OpenCL
# headers/loader, then delegates the jllama cmake build to build.sh (which owns the
# sccache probe + launcher). Same steady-state env as the other dockcross jobs. Inert
# without DEPOT_TOKEN (fork PRs) or use_cache=false.
env:
USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }}
SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev
SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }}
DOCKCROSS_ARGS: "-e SCCACHE_WEBDAV_ENDPOINT -e SCCACHE_WEBDAV_TOKEN -e USE_CACHE"
steps:
- uses: actions/checkout@v7
- name: Download shared WebUI assets
uses: actions/download-artifact@v8
with:
name: webui-generated
path: ${{ github.workspace }}/webui-generated/
- name: Build libraries
shell: bash
run: |
.github/dockcross/dockcross-android-arm64 .github/build_opencl_android.sh "-DOS_NAME=Linux-Android -DOS_ARCH=aarch64 -DGGML_OPENCL=ON -DGGML_OPENCL_EMBED_KERNELS=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON"
- name: Upload artifacts
uses: actions/upload-artifact@v7
with:
name: android-libraries-opencl
path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/
# ---------------------------------------------------------------------------
# Native build jobs — produce release artifacts + run C++ unit tests
# ---------------------------------------------------------------------------
build-macos-arm64-no-metal:
name: Build and Test macOS 15 arm64 (no Metal)
needs: [startgate, build-webui]
runs-on: macos-15
env:
BUILD_JOBS: 2
USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }}
SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev
SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }}
steps:
- uses: actions/checkout@v7
- name: Download shared WebUI assets
uses: actions/download-artifact@v8
with:
name: webui-generated
path: ${{ github.workspace }}/webui-generated/
- uses: actions/setup-java@v5
with:
distribution: 'temurin'
java-version: ${{ env.JAVA_VERSION }}
- name: Display CPU Info
shell: bash
run: |
echo "=== CPU Information ==="
sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null || true
echo ""
echo "=== Processor Details ==="
system_profiler SPHardwareDataType
- name: Install sccache (shared compiler cache)
if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != ''
continue-on-error: true
run: brew install sccache
- name: Build libraries
shell: bash
run: |
mvn --no-transfer-progress compile
.github/build.sh -DLLAMA_METAL=OFF -DGGML_NATIVE=OFF -DBUILD_TESTING=ON
- name: Run C++ unit tests
run: ctest --test-dir build --output-on-failure
- name: Upload artifacts
uses: actions/upload-artifact@v7
with:
name: macos-15-libraries
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
build-macos-arm64-metal:
name: Build and Test macOS 14 arm64 (Metal)
needs: [startgate, build-webui]
runs-on: macos-14
env:
BUILD_JOBS: 2
USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }}
SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev
SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }}
steps:
- uses: actions/checkout@v7
- name: Download shared WebUI assets
uses: actions/download-artifact@v8
with:
name: webui-generated
path: ${{ github.workspace }}/webui-generated/
- uses: actions/setup-java@v5
with:
distribution: 'temurin'
java-version: ${{ env.JAVA_VERSION }}
- name: Display CPU Info
shell: bash
run: |
echo "=== CPU Information ==="
sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null || true
echo ""
echo "=== Processor Details ==="
system_profiler SPHardwareDataType
- name: Install sccache (shared compiler cache)
if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != ''
continue-on-error: true
run: brew install sccache
- name: Build libraries
shell: bash
run: |
mvn --no-transfer-progress compile
.github/build.sh -DLLAMA_METAL_EMBED_LIBRARY=ON -DBUILD_TESTING=ON
- name: Run C++ unit tests
run: ctest --test-dir build --output-on-failure
- name: Upload artifacts
uses: actions/upload-artifact@v7
with:
name: macos-14-libraries
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
build-windows-x86_64:
name: Build and Test Windows 2025 x86_64 (VS 2026)
needs: [startgate, build-webui]
runs-on: windows-2025-vs2026
steps:
- uses: actions/checkout@v7
- name: Download shared WebUI assets
uses: actions/download-artifact@v8
with:
name: webui-generated
path: ${{ github.workspace }}/webui-generated/
- name: Display CPU Info
shell: pwsh
run: |
Write-Host "=== CPU Information (Get-CimInstance - All Properties) ==="
Get-CimInstance Win32_Processor | Select-Object * | Format-List
Write-Host ""
Write-Host "=== CPU Information (systeminfo) ==="
systeminfo | Select-String "Processor"
Write-Host ""
Write-Host "=== CPU Information (Get-ComputerInfo) ==="
Get-ComputerInfo -Property "CsProcessors*" 2>$null || Write-Host "Get-ComputerInfo not available"
- name: Build libraries
shell: cmd
run: |
.github\build.bat -G "Visual Studio 18 2026" -A "x64" -DOS_NAME=Windows -DOS_ARCH=x86_64 -DBUILD_TESTING=ON
- name: Run C++ unit tests
run: ctest --test-dir build --output-on-failure
- name: Upload artifacts
uses: actions/upload-artifact@v7
with:
name: Windows-x86_64-libraries
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
build-windows-x86:
name: Build and Test Windows 2025 x86 (VS 2026)
needs: [startgate, build-webui]
runs-on: windows-2025-vs2026
steps:
- uses: actions/checkout@v7
- name: Download shared WebUI assets
uses: actions/download-artifact@v8
with:
name: webui-generated
path: ${{ github.workspace }}/webui-generated/
- name: Display CPU Info
shell: pwsh
run: |
Write-Host "=== CPU Information (Get-CimInstance - All Properties) ==="
Get-CimInstance Win32_Processor | Select-Object * | Format-List
Write-Host ""
Write-Host "=== CPU Information (systeminfo) ==="
systeminfo | Select-String "Processor"
Write-Host ""
Write-Host "=== CPU Information (Get-ComputerInfo) ==="
Get-ComputerInfo -Property "CsProcessors*" 2>$null || Write-Host "Get-ComputerInfo not available"
- name: Build libraries
shell: cmd
run: |
.github\build.bat -G "Visual Studio 18 2026" -A "Win32" -DOS_NAME=Windows -DOS_ARCH=x86 -DBUILD_TESTING=ON
- name: Run C++ unit tests
run: ctest --test-dir build --output-on-failure
- name: Upload artifacts
uses: actions/upload-artifact@v7
with:
name: Windows-x86-libraries
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
# ---------------------------------------------------------------------------
# Windows Ninja Multi-Config + sccache — EVALUATION jobs (not yet released).
# The Visual Studio generator ignores CMAKE_{C,CXX}_COMPILER_LAUNCHER, so the two
# build-windows-* jobs above are the only uncached native builds. Upstream
# llama.cpp ships its windows-cuda artifact with "Ninja Multi-Config" + MSVC,
# which proves the combination works on the same tree. These two jobs run that
# combination in parallel with the trusted VS jobs and front cl.exe with sccache
# over Depot WebDAV (build.bat probe-guards it). Artifacts are named
# `Windows-*-ninja` (NOT `*-libraries`) so the package job's `pattern: "*-libraries"`
# does NOT pick them up — they are evaluation-only until cache hits are confirmed,
# at which point the release path is switched over (see TODO.md). The package job's
# `needs:` is intentionally left unchanged.
# ---------------------------------------------------------------------------
build-windows-x86_64-ninja:
name: Build and Test Windows 2025 x86_64 (Ninja Multi-Config + sccache, eval)
needs: [startgate, build-webui]
runs-on: windows-2025-vs2026
env:
USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }}
SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev
SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }}
steps:
- uses: actions/checkout@v7
- name: Download shared WebUI assets
uses: actions/download-artifact@v8
with:
name: webui-generated
path: ${{ github.workspace }}/webui-generated/
- name: Set up MSVC developer environment (x64)
uses: ilammy/msvc-dev-cmd@v1
with:
arch: x64
- name: Install sccache (shared compiler cache)
if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != ''
continue-on-error: true
shell: pwsh
run: |
$ver = "0.16.0"
$rel = "sccache-v$ver-x86_64-pc-windows-msvc"
$url = "https://github.com/mozilla/sccache/releases/download/v$ver/$rel.zip"
Write-Host "Downloading $url"
Invoke-WebRequest -Uri $url -OutFile "$env:RUNNER_TEMP\sccache.zip"
Expand-Archive -Path "$env:RUNNER_TEMP\sccache.zip" -DestinationPath "$env:RUNNER_TEMP\sccache" -Force
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\sccache\$rel"
- name: Display CPU Info
shell: pwsh
run: |
Write-Host "=== CPU Information (Get-CimInstance - All Properties) ==="
Get-CimInstance Win32_Processor | Select-Object * | Format-List
Write-Host ""
Write-Host "=== CPU Information (systeminfo) ==="
systeminfo | Select-String "Processor"
Write-Host ""
Write-Host "=== CPU Information (Get-ComputerInfo) ==="
Get-ComputerInfo -Property "CsProcessors*" 2>$null || Write-Host "Get-ComputerInfo not available"
- name: Build libraries
shell: cmd
run: |
.github\build.bat -G "Ninja Multi-Config" -DOS_NAME=Windows -DOS_ARCH=x86_64 -DBUILD_TESTING=ON
- name: Run C++ unit tests
run: ctest --test-dir build --output-on-failure
- name: Upload artifacts
uses: actions/upload-artifact@v7
with:
name: Windows-x86_64-ninja
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
build-windows-x86-ninja:
name: Build and Test Windows 2025 x86 (Ninja Multi-Config + sccache, eval)
needs: [startgate, build-webui]
runs-on: windows-2025-vs2026
env:
USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }}
SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev
SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }}
steps:
- uses: actions/checkout@v7
- name: Download shared WebUI assets
uses: actions/download-artifact@v8
with:
name: webui-generated
path: ${{ github.workspace }}/webui-generated/
- name: Set up MSVC developer environment (x86)
uses: ilammy/msvc-dev-cmd@v1
with:
arch: x86
- name: Install sccache (shared compiler cache)
if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != ''
continue-on-error: true
shell: pwsh
run: |
$ver = "0.16.0"
$rel = "sccache-v$ver-x86_64-pc-windows-msvc"
$url = "https://github.com/mozilla/sccache/releases/download/v$ver/$rel.zip"
Write-Host "Downloading $url"
Invoke-WebRequest -Uri $url -OutFile "$env:RUNNER_TEMP\sccache.zip"
Expand-Archive -Path "$env:RUNNER_TEMP\sccache.zip" -DestinationPath "$env:RUNNER_TEMP\sccache" -Force
Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\sccache\$rel"
- name: Display CPU Info
shell: pwsh
run: |
Write-Host "=== CPU Information (Get-CimInstance - All Properties) ==="
Get-CimInstance Win32_Processor | Select-Object * | Format-List
Write-Host ""
Write-Host "=== CPU Information (systeminfo) ==="
systeminfo | Select-String "Processor"
Write-Host ""
Write-Host "=== CPU Information (Get-ComputerInfo) ==="
Get-ComputerInfo -Property "CsProcessors*" 2>$null || Write-Host "Get-ComputerInfo not available"
- name: Build libraries
shell: cmd
run: |
.github\build.bat -G "Ninja Multi-Config" -DOS_NAME=Windows -DOS_ARCH=x86 -DBUILD_TESTING=ON
- name: Run C++ unit tests
run: ctest --test-dir build --output-on-failure
- name: Upload artifacts
uses: actions/upload-artifact@v7
with:
name: Windows-x86-ninja
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
# ---------------------------------------------------------------------------
# CI-only jobs — no release artifact, purely for test coverage
# ---------------------------------------------------------------------------
test-cpp-linux-x86_64:
name: C++ Tests Ubuntu Latest x86_64
needs: startgate
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v7
- uses: actions/setup-java@v5
with:
distribution: 'temurin'
java-version: ${{ env.JAVA_VERSION }}
- name: Display CPU Info
run: |
echo "=== CPU Information ==="
lscpu
echo ""
echo "=== CPU Details from /proc/cpuinfo ==="
cat /proc/cpuinfo
- name: Build libraries
run: |
mvn -q --no-transfer-progress compile
.github/build.sh -DBUILD_TESTING=ON
- name: Run C++ unit tests
run: ctest --test-dir build --output-on-failure
build-macos-arm64-metal-15:
name: Build and Test macOS 15 arm64 (Metal)
needs: [startgate, build-webui]
runs-on: macos-15
env:
BUILD_JOBS: 2
USE_CACHE: ${{ github.event_name != 'workflow_dispatch' || inputs.use_cache }}
SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev
SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }}
steps:
- uses: actions/checkout@v7
- name: Download shared WebUI assets
uses: actions/download-artifact@v8
with:
name: webui-generated
path: ${{ github.workspace }}/webui-generated/
- uses: actions/setup-java@v5
with:
distribution: 'temurin'
java-version: ${{ env.JAVA_VERSION }}
- name: Display CPU Info
shell: bash
run: |
echo "=== CPU Information ==="
sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null || true
echo ""
echo "=== Processor Details ==="
system_profiler SPHardwareDataType
- name: Install sccache (shared compiler cache)
if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != ''
continue-on-error: true
run: brew install sccache
- name: Build libraries
shell: bash
run: |
mvn --no-transfer-progress compile
.github/build.sh -DLLAMA_METAL_EMBED_LIBRARY=ON -DGGML_NATIVE=OFF -DBUILD_TESTING=ON
- name: Run C++ unit tests
run: ctest --test-dir build --output-on-failure
- name: Upload artifacts
uses: actions/upload-artifact@v7
with:
name: macos-15-metal-libraries
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
# ---------------------------------------------------------------------------
# Java test jobs — download release artifact, run mvn test
# ---------------------------------------------------------------------------
test-java-linux-x86_64:
name: Java Tests Ubuntu Latest x86_64
needs: crosscompile-linux-x86_64
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v7
- name: Display CPU Info
shell: bash
run: |
echo "=== CPU Information ==="
lscpu
echo ""
echo "=== CPU Details from /proc/cpuinfo ==="
cat /proc/cpuinfo
- uses: actions/download-artifact@v8
with:
name: Linux-x86_64-libraries
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
# GGUF model cache — introduced to stop re-downloading ~5 GB of test models from
# HuggingFace on every run (also dodges HF rate-limits). Complements the sccache compiler
# cache but is always ON: there is intentionally NO on/off flag for it (it is GitHub's
# free cache, safe + free), whereas the sccache cache is toggled by the `use_cache`
# workflow_dispatch input / USE_CACHE env. Not Depot — GB-scale blobs are usage-priced
# there and its file cache needs Depot-hosted runners. See CLAUDE.md.
- name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace)
uses: actions/cache@v5
with:
path: models/
# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
# bump the suffix when the model set / URLs change.
key: gguf-models-v1
- name: Download text generation model
run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
- name: Download reranking model
run: test -f models/${RERANKING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${RERANKING_MODEL_URL} --create-dirs -o models/${RERANKING_MODEL_NAME}
- name: Download draft model
run: test -f models/${DRAFT_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${DRAFT_MODEL_URL} --create-dirs -o models/${DRAFT_MODEL_NAME}
- name: Download reasoning model
run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
- name: Download tool-calling model
run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
- name: Download nomic embedding model (issue #98 regression)
run: test -f models/${NOMIC_EMBED_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${NOMIC_EMBED_MODEL_URL} --create-dirs -o models/${NOMIC_EMBED_MODEL_NAME}
- name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
- name: Download vision mmproj
run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
- name: List files in models directory
run: ls -l models/
- name: Validate model files
run: bash .github/validate-models.sh
- uses: actions/setup-java@v5
with:
distribution: 'temurin'
java-version: ${{ env.JAVA_VERSION }}
- name: Memory before tests
run: free -h
- name: Enable core dumps
run: |
ulimit -c unlimited
echo "${{ github.workspace }}/core.%e.%p" | sudo tee /proc/sys/kernel/core_pattern
- name: Run tests
run: |
mvn -e --no-transfer-progress -P jcstress test \
-Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \
-Dnet.ladenthin.llama.nomic.path=models/${NOMIC_EMBED_MODEL_NAME} \
-Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \
-Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \
-Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH}
- uses: actions/upload-artifact@v7
if: success()
with:
name: jacoco-report
path: target/site/jacoco/jacoco.xml
if-no-files-found: ignore
- name: Run PIT mutation tests
run: mvn --batch-mode --no-transfer-progress test-compile org.pitest:pitest-maven:mutationCoverage
- name: Extract PIT survivors
if: always()
run: |
echo "=== PIT Survived Mutations ==="
for html_file in $(find target/pit-reports -name "*.html" -type f 2>/dev/null | sort); do
if grep -q "SURVIVED" "$html_file"; then
echo "Found survivors in $html_file:"
grep -B 2 -A 3 "SURVIVED" "$html_file"
echo ""
fi
done
- uses: actions/upload-artifact@v7
if: always()
with: { name: pit-reports, path: target/pit-reports/ }
- name: Memory after tests
if: always()
run: free -h
- if: failure()
uses: actions/upload-artifact@v7
with:
name: error-log-linux-x86_64
path: |
${{ github.workspace }}/hs_err_pid*.log
${{ github.workspace }}/core.*
${{ github.workspace }}/*.hprof
${{ github.workspace }}/target/surefire-reports/*.dump
${{ github.workspace }}/target/surefire-reports/*.dumpstream
${{ github.workspace }}/target/surefire-reports/*.txt
${{ github.workspace }}/target/surefire-reports/TEST-*.xml
if-no-files-found: warn
# ---------------------------------------------------------------------------
# vmlens interleaving analysis — pure-Java, needs no native library or models.
# Staged to a single smoke test for now (see the `vmlens` profile in pom.xml).
# ---------------------------------------------------------------------------
vmlens:
name: Test (vmlens interleavings)
needs: startgate
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v7
- uses: actions/setup-java@v5
with:
distribution: 'temurin'
java-version: ${{ env.JAVA_VERSION }}
cache: maven
- name: Test under vmlens (interleaving analysis)
# Add each new test in the `vmlens` package to this -Dtest list (surefire
# -Dtest matches simple class names, not package globs; the default suite is
# excluded from the vmlens package via pom.xml managed surefire <excludes>).
run: >-
mvn --batch-mode --no-transfer-progress -Pvmlens test
-Dtest=VmlensInterleavingSmokeTest,SessionStateInterleavingTest -DfailIfNoTests=false
- uses: actions/upload-artifact@v7
if: always()
with:
name: vmlens-report
path: target/vmlens-report/
if-no-files-found: ignore
test-java-macos-arm64-metal:
name: Java Tests macOS 14 arm64 (Metal)
needs: build-macos-arm64-metal
runs-on: macos-14
steps:
- uses: actions/checkout@v7
- name: Display CPU Info
shell: bash
run: |
echo "=== CPU Information ==="
sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null || true
echo ""
echo "=== Processor Details ==="
system_profiler SPHardwareDataType
- uses: actions/download-artifact@v8
with:
name: macos-14-libraries
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
# GGUF model cache — introduced to stop re-downloading ~5 GB of test models from
# HuggingFace on every run (also dodges HF rate-limits). Complements the sccache compiler
# cache but is always ON: there is intentionally NO on/off flag for it (it is GitHub's
# free cache, safe + free), whereas the sccache cache is toggled by the `use_cache`
# workflow_dispatch input / USE_CACHE env. Not Depot — GB-scale blobs are usage-priced
# there and its file cache needs Depot-hosted runners. See CLAUDE.md.
- name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace)
uses: actions/cache@v5
with:
path: models/
# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
# bump the suffix when the model set / URLs change.
key: gguf-models-v1
- name: Download text generation model
run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
- name: Download reranking model
run: test -f models/${RERANKING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${RERANKING_MODEL_URL} --create-dirs -o models/${RERANKING_MODEL_NAME}
- name: Download draft model
run: test -f models/${DRAFT_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${DRAFT_MODEL_URL} --create-dirs -o models/${DRAFT_MODEL_NAME}
- name: Download reasoning model
run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
- name: Download tool-calling model
run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
- name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
- name: Download vision mmproj
run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
- name: List files in models directory
run: ls -l models/
- name: Validate model files
run: bash .github/validate-models.sh
- uses: actions/setup-java@v5
with:
distribution: 'temurin'
java-version: ${{ env.JAVA_VERSION }}
- name: Memory before tests
run: vm_stat && sysctl hw.memsize hw.physmem
- name: Enable core dumps
run: ulimit -c unlimited
- name: Run tests
run: |
mvn -e --no-transfer-progress -Dnet.ladenthin.llama.test.ngl=0 test \
-Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \
-Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \
-Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \
-Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH}
- name: Memory after tests
if: always()
run: vm_stat && sysctl hw.memsize hw.physmem
- if: failure()
uses: actions/upload-artifact@v7
with:
name: error-log-macos-14-metal
path: |
${{ github.workspace }}/hs_err_pid*.log
${{ github.workspace }}/*.hprof
${{ github.workspace }}/target/surefire-reports/*.dump
${{ github.workspace }}/target/surefire-reports/*.dumpstream
${{ github.workspace }}/target/surefire-reports/*.txt
${{ github.workspace }}/target/surefire-reports/TEST-*.xml
if-no-files-found: warn
test-java-macos-arm64-no-metal:
name: Java Tests macOS 15 arm64 (no Metal)
needs: build-macos-arm64-no-metal
runs-on: macos-15
steps:
- uses: actions/checkout@v7
- name: Display CPU Info
shell: bash
run: |
echo "=== CPU Information ==="
sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null || true
echo ""
echo "=== Processor Details ==="
system_profiler SPHardwareDataType
- uses: actions/download-artifact@v8
with:
name: macos-15-libraries
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
# GGUF model cache — introduced to stop re-downloading ~5 GB of test models from
# HuggingFace on every run (also dodges HF rate-limits). Complements the sccache compiler
# cache but is always ON: there is intentionally NO on/off flag for it (it is GitHub's
# free cache, safe + free), whereas the sccache cache is toggled by the `use_cache`
# workflow_dispatch input / USE_CACHE env. Not Depot — GB-scale blobs are usage-priced
# there and its file cache needs Depot-hosted runners. See CLAUDE.md.
- name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace)
uses: actions/cache@v5
with:
path: models/
# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
# bump the suffix when the model set / URLs change.
key: gguf-models-v1
- name: Download text generation model
run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
- name: Download reranking model
run: test -f models/${RERANKING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${RERANKING_MODEL_URL} --create-dirs -o models/${RERANKING_MODEL_NAME}
- name: Download draft model
run: test -f models/${DRAFT_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${DRAFT_MODEL_URL} --create-dirs -o models/${DRAFT_MODEL_NAME}
- name: Download reasoning model
run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
- name: Download tool-calling model
run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
- name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
- name: Download vision mmproj
run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
- name: List files in models directory
run: ls -l models/
- name: Validate model files
run: bash .github/validate-models.sh
- uses: actions/setup-java@v5
with:
distribution: 'temurin'
java-version: ${{ env.JAVA_VERSION }}
- name: Memory before tests
run: vm_stat && sysctl hw.memsize hw.physmem
- name: Enable core dumps
run: ulimit -c unlimited
- name: Run tests
run: |
mvn -e --no-transfer-progress test \
-Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \
-Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \
-Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \
-Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH}
- name: Memory after tests
if: always()
run: vm_stat && sysctl hw.memsize hw.physmem
- if: failure()
uses: actions/upload-artifact@v7
with:
name: error-log-macos-15-no-metal
path: |
${{ github.workspace }}/hs_err_pid*.log
${{ github.workspace }}/*.hprof
${{ github.workspace }}/target/surefire-reports/*.dump
${{ github.workspace }}/target/surefire-reports/*.dumpstream
${{ github.workspace }}/target/surefire-reports/*.txt
${{ github.workspace }}/target/surefire-reports/TEST-*.xml
if-no-files-found: warn
test-java-macos-arm64-metal-15:
name: Java Tests macOS 15 arm64 (Metal)
needs: build-macos-arm64-metal-15
runs-on: macos-15
steps:
- uses: actions/checkout@v7
- name: Display CPU Info
shell: bash
run: |
echo "=== CPU Information ==="
sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null || true
echo ""
echo "=== Processor Details ==="
system_profiler SPHardwareDataType
- uses: actions/download-artifact@v8
with:
name: macos-15-metal-libraries
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
# GGUF model cache — introduced to stop re-downloading ~5 GB of test models from
# HuggingFace on every run (also dodges HF rate-limits). Complements the sccache compiler
# cache but is always ON: there is intentionally NO on/off flag for it (it is GitHub's
# free cache, safe + free), whereas the sccache cache is toggled by the `use_cache`
# workflow_dispatch input / USE_CACHE env. Not Depot — GB-scale blobs are usage-priced
# there and its file cache needs Depot-hosted runners. See CLAUDE.md.
- name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace)
uses: actions/cache@v5
with:
path: models/
# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
# bump the suffix when the model set / URLs change.
key: gguf-models-v1
- name: Download text generation model
run: test -f models/${MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
- name: Download reranking model
run: test -f models/${RERANKING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${RERANKING_MODEL_URL} --create-dirs -o models/${RERANKING_MODEL_NAME}
- name: Download draft model
run: test -f models/${DRAFT_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${DRAFT_MODEL_URL} --create-dirs -o models/${DRAFT_MODEL_NAME}
- name: Download reasoning model
run: test -f models/${REASONING_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
- name: Download tool-calling model
run: test -f models/${TOOL_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
- name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
run: test -f models/${VISION_MODEL_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
- name: Download vision mmproj
run: test -f models/${VISION_MMPROJ_NAME} || curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
- name: List files in models directory
run: ls -l models/
- name: Validate model files
run: bash .github/validate-models.sh
- uses: actions/setup-java@v5
with:
distribution: 'temurin'
java-version: ${{ env.JAVA_VERSION }}
- name: Memory before tests
run: vm_stat && sysctl hw.memsize hw.physmem
- name: Enable core dumps
run: ulimit -c unlimited
- name: Run tests
run: |
mvn -e --no-transfer-progress test \
-Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \
-Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \
-Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \
-Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH}
- name: Memory after tests
if: always()
run: vm_stat && sysctl hw.memsize hw.physmem
- if: failure()
uses: actions/upload-artifact@v7
with:
name: error-log-macos-15-metal
path: |
${{ github.workspace }}/hs_err_pid*.log
${{ github.workspace }}/*.hprof
${{ github.workspace }}/target/surefire-reports/*.dump
${{ github.workspace }}/target/surefire-reports/*.dumpstream
${{ github.workspace }}/target/surefire-reports/*.txt
${{ github.workspace }}/target/surefire-reports/TEST-*.xml
if-no-files-found: warn
test-java-windows-x86_64:
name: Java Tests Windows 2025 x86_64 (VS 2026)
needs: build-windows-x86_64
runs-on: windows-2025-vs2026
steps:
- uses: actions/checkout@v7
- name: Display CPU Info
shell: pwsh
run: |
Write-Host "=== CPU Information (Get-CimInstance - All Properties) ==="
Get-CimInstance Win32_Processor | Select-Object * | Format-List
Write-Host ""
Write-Host "=== CPU Information (systeminfo) ==="
systeminfo | Select-String "Processor"
Write-Host ""
Write-Host "=== CPU Information (Get-ComputerInfo) ==="
Get-ComputerInfo -Property "CsProcessors*" 2>$null || Write-Host "Get-ComputerInfo not available"
- uses: actions/download-artifact@v8
with:
name: Windows-x86_64-libraries
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
- name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace)
uses: actions/cache@v5
with:
path: models/
# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
# bump the suffix when the model set / URLs change.
key: gguf-models-v1
- name: Download text generation model
run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME }
- name: Download reranking model
run: if (-not (Test-Path "models/$env:RERANKING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME }
- name: Download draft model
run: if (-not (Test-Path "models/$env:DRAFT_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME }
- name: Download reasoning model
run: if (-not (Test-Path "models/$env:REASONING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME }
- name: Download tool-calling model
run: if (-not (Test-Path "models/$env:TOOL_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME }
- name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
run: if (-not (Test-Path "models/$env:VISION_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME }
- name: Download vision mmproj
run: if (-not (Test-Path "models/$env:VISION_MMPROJ_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME }
- name: List files in models directory
run: ls -l models/
- name: Validate model files
run: .github\validate-models.bat
- uses: actions/setup-java@v5
with:
distribution: 'temurin'
java-version: ${{ env.JAVA_VERSION }}
- name: Memory before tests
run: Get-CimInstance Win32_OperatingSystem | Select-Object FreePhysicalMemory,TotalVisibleMemorySize | Format-List
shell: pwsh
- name: Enable WER LocalDumps for java.exe
# Windows Error Reporting writes minidumps when java.exe (or any other
# registered process) crashes via __fastfail / abort / unhandled SEH.
# We use it as the Windows analogue of Linux core dumps so that a JVM
# crash inside the JNI layer leaves us a real native callstack instead
# of just surefire's "VM terminated without saying goodbye" line.
# DumpType=2 == MiniDumpWithFullMemory; the workspace dumps/ folder is
# globbed by the failure-upload step below.
shell: pwsh
run: |
$key = 'HKLM:\SOFTWARE\Microsoft\Windows\Windows Error Reporting\LocalDumps\java.exe'
New-Item -Path $key -Force | Out-Null
New-Item -Path "${{ github.workspace }}\dumps" -ItemType Directory -Force | Out-Null
New-ItemProperty -Path $key -Name 'DumpFolder' -Value "${{ github.workspace }}\dumps" -PropertyType ExpandString -Force | Out-Null
New-ItemProperty -Path $key -Name 'DumpType' -Value 2 -PropertyType DWord -Force | Out-Null
New-ItemProperty -Path $key -Name 'DumpCount' -Value 5 -PropertyType DWord -Force | Out-Null
Get-ItemProperty -Path $key | Format-List
- name: Run tests
run: |
mvn -e --no-transfer-progress test `
"-Dnet.ladenthin.llama.tool.model=models/$env:TOOL_MODEL_NAME" `
"-Dnet.ladenthin.llama.vision.model=models/$env:VISION_MODEL_NAME" `
"-Dnet.ladenthin.llama.vision.mmproj=models/$env:VISION_MMPROJ_NAME" `
"-Dnet.ladenthin.llama.vision.image=$env:VISION_IMAGE_PATH"
- name: Memory after tests
if: always()
run: Get-CimInstance Win32_OperatingSystem | Select-Object FreePhysicalMemory,TotalVisibleMemorySize | Format-List
shell: pwsh
- if: failure()
uses: actions/upload-artifact@v7
with:
name: windows-output
path: |
${{ github.workspace }}\hs_err_pid*.log
${{ github.workspace }}\*.hprof
${{ github.workspace }}\dumps\*.dmp
${{ github.workspace }}\target\surefire-reports\*.dump
${{ github.workspace }}\target\surefire-reports\*.dumpstream
${{ github.workspace }}\target\surefire-reports\*.txt
${{ github.workspace }}\target\surefire-reports\TEST-*.xml
${{ github.workspace }}/src/main/resources/net/ladenthin/llama/**/*
if-no-files-found: warn
# Java/inference validation of the Ninja-built x86_64 DLL (the analogue of
# test-java-windows-x86_64 for the MSVC build). Loads the Ninja jllama.dll via
# JNI and runs the full model-backed suite, so both Windows generators are
# validated end-to-end before the `ninja-windows` classifier JAR ships.
test-java-windows-x86_64-ninja:
name: Java Tests Windows 2025 x86_64 (Ninja, eval)
needs: build-windows-x86_64-ninja
runs-on: windows-2025-vs2026
steps:
- uses: actions/checkout@v7
- name: Display CPU Info
shell: pwsh
run: |
Write-Host "=== CPU Information (Get-CimInstance - All Properties) ==="
Get-CimInstance Win32_Processor | Select-Object * | Format-List
Write-Host ""
Write-Host "=== CPU Information (systeminfo) ==="
systeminfo | Select-String "Processor"
Write-Host ""
Write-Host "=== CPU Information (Get-ComputerInfo) ==="
Get-ComputerInfo -Property "CsProcessors*" 2>$null || Write-Host "Get-ComputerInfo not available"
- uses: actions/download-artifact@v8
with:
name: Windows-x86_64-ninja
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
- name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace)
uses: actions/cache@v5
with:
path: models/
# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
# bump the suffix when the model set / URLs change.
key: gguf-models-v1
- name: Download text generation model
run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME }
- name: Download reranking model
run: if (-not (Test-Path "models/$env:RERANKING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME }
- name: Download draft model
run: if (-not (Test-Path "models/$env:DRAFT_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME }
- name: Download reasoning model
run: if (-not (Test-Path "models/$env:REASONING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME }
- name: Download tool-calling model
run: if (-not (Test-Path "models/$env:TOOL_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME }
- name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
run: if (-not (Test-Path "models/$env:VISION_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME }
- name: Download vision mmproj
run: if (-not (Test-Path "models/$env:VISION_MMPROJ_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME }
- name: List files in models directory
run: ls -l models/
- name: Validate model files
run: .github\validate-models.bat
- uses: actions/setup-java@v5
with:
distribution: 'temurin'
java-version: ${{ env.JAVA_VERSION }}
- name: Memory before tests
run: Get-CimInstance Win32_OperatingSystem | Select-Object FreePhysicalMemory,TotalVisibleMemorySize | Format-List
shell: pwsh
- name: Enable WER LocalDumps for java.exe
# Windows Error Reporting writes minidumps when java.exe (or any other
# registered process) crashes via __fastfail / abort / unhandled SEH.
# We use it as the Windows analogue of Linux core dumps so that a JVM
# crash inside the JNI layer leaves us a real native callstack instead
# of just surefire's "VM terminated without saying goodbye" line.
# DumpType=2 == MiniDumpWithFullMemory; the workspace dumps/ folder is
# globbed by the failure-upload step below.
shell: pwsh
run: |
$key = 'HKLM:\SOFTWARE\Microsoft\Windows\Windows Error Reporting\LocalDumps\java.exe'
New-Item -Path $key -Force | Out-Null
New-Item -Path "${{ github.workspace }}\dumps" -ItemType Directory -Force | Out-Null
New-ItemProperty -Path $key -Name 'DumpFolder' -Value "${{ github.workspace }}\dumps" -PropertyType ExpandString -Force | Out-Null
New-ItemProperty -Path $key -Name 'DumpType' -Value 2 -PropertyType DWord -Force | Out-Null
New-ItemProperty -Path $key -Name 'DumpCount' -Value 5 -PropertyType DWord -Force | Out-Null
Get-ItemProperty -Path $key | Format-List
- name: Run tests
run: |
mvn -e --no-transfer-progress test `
"-Dnet.ladenthin.llama.tool.model=models/$env:TOOL_MODEL_NAME" `
"-Dnet.ladenthin.llama.vision.model=models/$env:VISION_MODEL_NAME" `
"-Dnet.ladenthin.llama.vision.mmproj=models/$env:VISION_MMPROJ_NAME" `
"-Dnet.ladenthin.llama.vision.image=$env:VISION_IMAGE_PATH"
- name: Memory after tests
if: always()
run: Get-CimInstance Win32_OperatingSystem | Select-Object FreePhysicalMemory,TotalVisibleMemorySize | Format-List
shell: pwsh
- if: failure()
uses: actions/upload-artifact@v7
with:
name: windows-output-ninja
path: |
${{ github.workspace }}\hs_err_pid*.log
${{ github.workspace }}\*.hprof
${{ github.workspace }}\dumps\*.dmp
${{ github.workspace }}\target\surefire-reports\*.dump
${{ github.workspace }}\target\surefire-reports\*.dumpstream
${{ github.workspace }}\target\surefire-reports\*.txt
${{ github.workspace }}\target\surefire-reports\TEST-*.xml
${{ github.workspace }}/src/main/resources/net/ladenthin/llama/**/*
if-no-files-found: warn
# ---------------------------------------------------------------------------
# Package and publish
# ---------------------------------------------------------------------------
package:
name: Package JARs
needs:
- crosscompile-linux-x86_64-cuda
- crosscompile-linux-aarch64
- crosscompile-android-aarch64
- crosscompile-android-aarch64-opencl
- build-windows-x86
- build-windows-x86_64-ninja
- build-windows-x86-ninja
- test-cpp-linux-x86_64
- build-macos-arm64-metal-15
- test-java-linux-x86_64
- test-java-macos-arm64-metal
- test-java-macos-arm64-no-metal
- test-java-macos-arm64-metal-15
- test-java-windows-x86_64
- test-java-windows-x86_64-ninja
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v7
- uses: actions/download-artifact@v8
with:
pattern: "*-libraries"
merge-multiple: true
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
- uses: actions/download-artifact@v8
with:
name: linux-libraries-cuda
path: ${{ github.workspace }}/src/main/resources_linux_cuda/net/ladenthin/llama/
- uses: actions/download-artifact@v8
with:
name: android-libraries-opencl
path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/
# Ninja-built Windows natives -> separate tree consumed by the `windows-ninja`
# Maven profile (the `ninja-windows` classifier JAR). The default JAR keeps the
# MSVC `*-libraries` natives downloaded above.
- uses: actions/download-artifact@v8
with:
name: Windows-x86_64-ninja
path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/
- uses: actions/download-artifact@v8
with:
name: Windows-x86-ninja
path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/
- uses: actions/setup-java@v5
with:
distribution: 'temurin'
java-version: ${{ env.JAVA_VERSION }}
- name: Build JARs
# `assembly` additionally produces the fat jar-with-dependencies uber JAR
# (llama-<version>-jar-with-dependencies.jar: library classes + Java runtime deps +
# default-platform native libs in one drop-on-classpath JAR, runnable via its
# OpenAiCompatServer Main-Class). It lands in target/ and is uploaded in the `llama-jars`
# artifact below - a CI run artifact only, not a Maven Central / GitHub-Release asset.
# `windows-ninja` attaches the `ninja-windows` classifier JAR (Ninja-built Windows natives).
run: mvn --batch-mode --no-transfer-progress -P release,cuda,opencl-android,windows-ninja,assembly -Dmaven.test.skip=true -Dgpg.skip=true package
- name: Upload JARs
uses: actions/upload-artifact@v7
with:
name: llama-jars
path: target/*.jar
report:
name: Report
needs: [package]
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- uses: actions/checkout@v7
- uses: actions/setup-java@v5
with: { java-version: '${{ env.JAVA_VERSION }}', distribution: temurin }
- uses: actions/download-artifact@v8
with: { name: jacoco-report, path: target/site/jacoco/ }
continue-on-error: true
- uses: advanced-security/maven-dependency-submission-action@v5
- name: Coveralls
uses: coverallsapp/github-action@v2
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
file: target/site/jacoco/jacoco.xml
format: jacoco
continue-on-error: true
- name: Codecov
uses: codecov/codecov-action@v7
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: target/site/jacoco/jacoco.xml
continue-on-error: true
check-snapshot:
name: "Check: main branch / SNAPSHOT"
needs: [report]
runs-on: ubuntu-latest
if: >-
(github.event_name == 'push' && github.ref == 'refs/heads/main') ||
(github.event_name == 'workflow_dispatch' && !startsWith(github.ref, 'refs/tags/v'))
steps:
- name: Confirm snapshot ref
run: echo "Confirmed on snapshot ref ${{ github.ref }}"
check-tag:
name: "Check: v* tag"
needs: [report]
runs-on: ubuntu-latest
if: startsWith(github.ref, 'refs/tags/v')
steps:
- name: Confirm tag ref
run: echo "Confirmed on tag ${{ github.ref }}"
publish-snapshot:
name: Publish Snapshot to Central
needs: [check-snapshot, crosscompile-linux-x86_64-cuda, crosscompile-android-aarch64-opencl, code-style]
if: needs.check-snapshot.result == 'success' && inputs.publish_to_central
runs-on: ubuntu-latest
environment: maven-central
permissions:
contents: write
steps:
- uses: actions/checkout@v7
- uses: actions/download-artifact@v8
with:
pattern: "*-libraries"
merge-multiple: true
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
- uses: actions/download-artifact@v8
with:
name: linux-libraries-cuda
path: ${{ github.workspace }}/src/main/resources_linux_cuda/net/ladenthin/llama/
- uses: actions/download-artifact@v8
with:
name: android-libraries-opencl
path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/
- uses: actions/download-artifact@v8
with:
name: Windows-x86_64-ninja
path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/
- uses: actions/download-artifact@v8
with:
name: Windows-x86-ninja
path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/
- name: Set up Maven Central Repository
uses: actions/setup-java@v5
with:
java-version: ${{ env.JAVA_VERSION }}
distribution: 'temurin'
server-id: central
server-username: MAVEN_USERNAME
server-password: MAVEN_PASSWORD
gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }}
gpg-passphrase: MAVEN_GPG_PASSPHRASE
- name: Guard - require a -SNAPSHOT version
shell: bash
run: |
VERSION=$(mvn -q -DforceStdout help:evaluate -Dexpression=project.version | tail -n1)
echo "Resolved project version: $VERSION"
case "$VERSION" in
*-SNAPSHOT) echo "OK: -SNAPSHOT version, continuing snapshot deploy." ;;
*) echo "::error::Refusing to publish non-SNAPSHOT version '$VERSION' from the snapshot job. Snapshot publishing requires a -SNAPSHOT version; releases go through the v* tag path."; exit 1 ;;
esac
- name: Publish snapshot
run: mvn --batch-mode --no-transfer-progress -P release,cuda,opencl-android,windows-ninja -Dmaven.test.skip=true deploy
env:
MAVEN_USERNAME: ${{ secrets.CENTRAL_USERNAME }}
MAVEN_PASSWORD: ${{ secrets.CENTRAL_TOKEN }}
MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
- name: Collect signed artifacts
run: |
mkdir -p signed-snapshot-assets
cp target/*.jar signed-snapshot-assets/ 2>/dev/null || true
cp target/*.jar.asc signed-snapshot-assets/ 2>/dev/null || true
- uses: actions/upload-artifact@v7
with:
name: signed-snapshot-assets
path: signed-snapshot-assets/
github-snapshot:
name: Update Snapshot Pre-release on GitHub
needs: [publish-snapshot]
if: needs.publish-snapshot.result == 'success'
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- uses: actions/download-artifact@v8
with:
name: signed-snapshot-assets
path: snapshot-assets/
- name: Update snapshot pre-release
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh release view snapshot --repo ${{ github.repository }} 2>/dev/null \
|| gh release create snapshot \
--repo ${{ github.repository }} \
--prerelease \
--title "Snapshot (latest)" \
--notes "Latest snapshot build from the main branch."
gh release upload snapshot snapshot-assets/* \
--repo ${{ github.repository }} \
--clobber
publish-release:
name: Publish Release to Central
if: needs.check-tag.result == 'success' && inputs.publish_to_central
needs: [check-tag, crosscompile-linux-x86_64-cuda, crosscompile-android-aarch64-opencl, code-style]
runs-on: ubuntu-latest
environment: maven-central
permissions:
contents: write
steps:
- uses: actions/checkout@v7
- uses: actions/download-artifact@v8
with:
pattern: "*-libraries"
merge-multiple: true
path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
- uses: actions/download-artifact@v8
with:
name: linux-libraries-cuda
path: ${{ github.workspace }}/src/main/resources_linux_cuda/net/ladenthin/llama/
- uses: actions/download-artifact@v8
with:
name: android-libraries-opencl
path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/
- uses: actions/download-artifact@v8
with:
name: Windows-x86_64-ninja
path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/
- uses: actions/download-artifact@v8
with:
name: Windows-x86-ninja
path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/
- name: Set up Maven Central Repository
uses: actions/setup-java@v5
with:
java-version: ${{ env.JAVA_VERSION }}
distribution: 'temurin'
server-id: central
server-username: MAVEN_USERNAME
server-password: MAVEN_PASSWORD
gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }}
gpg-passphrase: MAVEN_GPG_PASSPHRASE
- name: Publish release
run: mvn --batch-mode --no-transfer-progress -P release,cuda,opencl-android,windows-ninja -Dmaven.test.skip=true deploy
env:
MAVEN_USERNAME: ${{ secrets.CENTRAL_USERNAME }}
MAVEN_PASSWORD: ${{ secrets.CENTRAL_TOKEN }}
MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
- name: Collect signed artifacts
run: |
mkdir -p signed-release-assets
cp target/*.jar signed-release-assets/ 2>/dev/null || true
cp target/*.jar.asc signed-release-assets/ 2>/dev/null || true
- uses: actions/upload-artifact@v7
with:
name: signed-release-assets
path: signed-release-assets/
github-release-signed:
name: Attach Signed Binaries to GitHub Release
needs: [publish-release]
if: needs.publish-release.result == 'success'
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- uses: actions/download-artifact@v8
with:
name: signed-release-assets
path: release-assets/
- name: Upload release assets
uses: softprops/action-gh-release@v3
with:
files: release-assets/*