Merge pull request #270 from bernardladenthin/claude/laughing-albatta… #454

Workflow file for this run

.github/workflows/publish.yml at 7633baf

	# SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
	# SPDX-FileCopyrightText: 2023-2025 Konstantin Herud
	#
	# SPDX-License-Identifier: MIT

	name: Publish
	on:
	push:
	branches: [ main ]
	tags: ['v*']
	pull_request:
	workflow_dispatch:
	inputs:
	publish_to_central:
	description: "Deploy to Maven Central (snapshot if -SNAPSHOT, release if a vX.Y.Z tag)"
	type: boolean
	default: false
	use_cache:
	description: "Use the shared sccache/Depot compiler cache (faster incremental builds)"
	type: boolean
	default: true
	env:
	JAVA_VERSION: '21'
	MODEL_URL: "https://huggingface.co/TheBloke/CodeLlama-7B-GGUF/resolve/main/codellama-7b.Q2_K.gguf"
	MODEL_NAME: "codellama-7b.Q2_K.gguf"
	RERANKING_MODEL_URL: "https://huggingface.co/gpustack/jina-reranker-v1-tiny-en-GGUF/resolve/main/jina-reranker-v1-tiny-en-Q4_0.gguf"
	RERANKING_MODEL_NAME: "jina-reranker-v1-tiny-en-Q4_0.gguf"
	DRAFT_MODEL_URL: "https://huggingface.co/QuantFactory/AMD-Llama-135m-code-GGUF/resolve/main/AMD-Llama-135m-code.Q2_K.gguf"
	DRAFT_MODEL_NAME: "AMD-Llama-135m-code.Q2_K.gguf"
	REASONING_MODEL_URL: "https://huggingface.co/unsloth/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B-Q4_K_M.gguf"
	REASONING_MODEL_NAME: "Qwen3-0.6B-Q4_K_M.gguf"
	TOOL_MODEL_URL: "https://huggingface.co/bartowski/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
	TOOL_MODEL_NAME: "Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
	NOMIC_EMBED_MODEL_URL: "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF/resolve/main/nomic-embed-text-v1.5.f16.gguf"
	NOMIC_EMBED_MODEL_NAME: "nomic-embed-text-v1.5.f16.gguf"
	# Vision model + mmproj for MultimodalIntegrationTest (upstream kherud/java-llama.cpp#103 / #34).
	# SmolVLM-500M is the smallest community vision GGUF that loads reliably
	# under the upstream mtmd pipeline. Total download ~600 MB across model
	# plus mmproj; matches the existing per-test-job download budget.
	VISION_MODEL_URL: "https://huggingface.co/ggml-org/SmolVLM-500M-Instruct-GGUF/resolve/main/SmolVLM-500M-Instruct-Q8_0.gguf"
	VISION_MODEL_NAME: "SmolVLM-500M-Instruct-Q8_0.gguf"
	VISION_MMPROJ_URL: "https://huggingface.co/ggml-org/SmolVLM-500M-Instruct-GGUF/resolve/main/mmproj-SmolVLM-500M-Instruct-Q8_0.gguf"
	VISION_MMPROJ_NAME: "mmproj-SmolVLM-500M-Instruct-Q8_0.gguf"
	# Text-to-speech models for AudioInputIntegrationTest's sibling TtsIntegrationTest (OuteTTS pipeline).
	TTS_MODEL_URL: "https://huggingface.co/second-state/OuteTTS-0.2-500M-GGUF/resolve/main/OuteTTS-0.2-500M-Q4_K_M.gguf"
	TTS_MODEL_NAME: "OuteTTS-0.2-500M-Q4_K_M.gguf"
	TTS_VOCODER_URL: "https://huggingface.co/ggml-org/WavTokenizer/resolve/main/WavTokenizer-Large-75-F16.gguf"
	TTS_VOCODER_NAME: "WavTokenizer-Large-75-F16.gguf"
	# Test image used by MultimodalIntegrationTest is committed to the repo
	# at src/test/resources/images/test-image.jpg (see the README in that
	# directory for licensing). No download step is needed; CI just points
	# mvn test at the committed path.
	VISION_IMAGE_PATH: "src/test/resources/images/test-image.jpg"
	permissions:
	contents: read
	jobs:

	# ---------------------------------------------------------------------------
	# Start gate — single cancellable abort window before the pipeline starts.
	# The wait duration lives in the `startgate` GitHub Environment (Settings →
	# Environments → startgate → Wait timer).
	# ---------------------------------------------------------------------------

	startgate:
	name: Start gate (abort window)
	runs-on: ubuntu-latest
	environment: startgate
	steps:
	- run: echo "Start gate elapsed — proceeding with pipeline."

	# ---------------------------------------------------------------------------
	# Cross-compile jobs (Docker / dockcross) — produce release artifacts, no testing
	# ---------------------------------------------------------------------------

	code-style:
	name: Code style (spotless) + package graph
	needs: startgate
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v7
	- uses: actions/setup-java@v5
	with:
	java-version: '21'
	distribution: temurin
	- name: Spotless check (fail fast on format violations)
	run: mvn -B --no-transfer-progress spotless:check
	- name: Print internal package dependency graph (jdeps, informational)
	continue-on-error: true
	run: \|
	mvn -B --no-transfer-progress -DskipTests -Denforcer.skip=true compile
	echo "=== internal package dependency graph (jdeps, bytecode) ==="
	jdeps -verbose:package target/classes \| grep 'net.ladenthin.llama' \|\| true

	# ---------------------------------------------------------------------------
	# Build the llama.cpp WebUI ONCE, from the same pinned tag CMakeLists.txt fetches,
	# and share it to every native build as the generated, platform-independent
	# ui.cpp/ui.h ("webui-generated" artifact). The native builds embed it into
	# libjllama (CMake's "WebUI assets" block); when this job's artifact is absent the
	# build falls back to the empty-asset stub. npm runs only here, in one controlled
	# job — never in the dockcross cross-compilers (which have no node) or per-platform.
	# ---------------------------------------------------------------------------
	build-webui:
	name: Build WebUI assets (shared)
	needs: startgate
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v7
	- name: Resolve pinned llama.cpp tag from CMakeLists.txt
	id: tag
	shell: bash
	run: \|
	TAG=$(grep -oE 'GIT_TAG[[:space:]]+b[0-9]+' CMakeLists.txt \| grep -oE 'b[0-9]+' \| head -1)
	if [ -z "$TAG" ]; then
	echo "could not resolve llama.cpp GIT_TAG (b<nnnn>) from CMakeLists.txt" >&2
	exit 1
	fi
	echo "tag=$TAG" >> "$GITHUB_OUTPUT"
	echo "Pinned llama.cpp WebUI tag: $TAG"
	- name: Checkout llama.cpp tools/ui at the pinned tag
	uses: actions/checkout@v7
	with:
	repository: ggml-org/llama.cpp
	ref: ${{ steps.tag.outputs.tag }}
	path: llamacpp-ui
	sparse-checkout: tools/ui
	sparse-checkout-cone-mode: true
	- uses: actions/setup-node@v6
	with:
	node-version: '24'
	cache: npm
	cache-dependency-path: llamacpp-ui/tools/ui/package-lock.json
	- name: Build WebUI (Svelte/Vite)
	working-directory: llamacpp-ui/tools/ui
	env:
	HF_UI_VERSION: ${{ steps.tag.outputs.tag }}
	LLAMA_BUILD_NUMBER: ${{ steps.tag.outputs.tag }}
	run: \|
	npm ci --ignore-scripts
	npm run build
	test -f dist/index.html
	- name: Embed assets into ui.cpp / ui.h (gzip parity with upstream)
	working-directory: llamacpp-ui/tools/ui
	shell: bash
	run: \|
	set -euo pipefail
	# gzip every asset into dist/_gzip/<path> so llama-ui-embed embeds the
	# compressed bytes (LLAMA_UI_GZIP parity); embed auto-detects _gzip.
	( cd dist && find . -type f -not -path './_gzip/*' \| while read -r f; do
	mkdir -p "_gzip/$(dirname "$f")"
	gzip -9 -c "$f" > "_gzip/$f"
	done )
	# llama-ui-embed is a self-contained C++17 host tool (no npm) — build + run it.
	g++ -O2 -std=c++17 -o llama-ui-embed embed.cpp
	mkdir -p "$GITHUB_WORKSPACE/webui-generated"
	./llama-ui-embed \
	"$GITHUB_WORKSPACE/webui-generated/ui.cpp" \
	"$GITHUB_WORKSPACE/webui-generated/ui.h" \
	dist
	echo "=== generated WebUI assets ==="
	ls -la "$GITHUB_WORKSPACE/webui-generated"
	if grep -q LLAMA_UI_HAS_ASSETS "$GITHUB_WORKSPACE/webui-generated/ui.h"; then
	echo "LLAMA_UI_HAS_ASSETS: present (real WebUI embedded)"
	else
	echo "ERROR: embed produced an empty asset table" >&2
	exit 1
	fi
	- name: Upload WebUI artifact
	uses: actions/upload-artifact@v7
	with:
	name: webui-generated
	path: ${{ github.workspace }}/webui-generated/
	retention-days: 1
	if-no-files-found: error

	crosscompile-linux-x86_64-cuda:
	name: Cross-Compile manylinux_2_28 x86_64 (CUDA)
	needs: [startgate, build-webui]
	runs-on: ubuntu-latest
	# CUDA cache. build_cuda_linux.sh execs build.sh, so the same sccache probe guards this job.
	# build.sh also wraps nvcc (CMAKE_CUDA_COMPILER_LAUNCHER=sccache) for CUDA builds, so the
	# per-arch .cu device passes — the dominant cost of this job — cache over Depot alongside the
	# gcc host TUs. Verified on a warm run: 100% hit on CUDA / CUBIN / device-code (139 CUDA hits,
	# 99.86% overall), cutting the job from ~51 min cold to ~15 min warm. The job therefore always
	# builds the FULL CMAKE_CUDA_ARCHITECTURES set (no single-arch shortcut) and leans on the warm
	# cache for speed, so every artifact stays release-safe (runs on every GPU generation) on PR /
	# push as well as publish. CUDA_FAST_BUILD still exists in build_cuda_linux.sh as a LOCAL-dev
	# knob, but CI no longer sets it. The first-run sccache debug diagnostics (SCCACHE_LOG /
	# SCCACHE_ERROR_LOG / RUST_BACKTRACE) were dropped now that caching is confirmed; build.sh still
	# prints the `sccache --show-stats` hit table at the end of every run. Inert without DEPOT_TOKEN
	# (fork PRs) or use_cache=false.
	env:
	USE_CACHE: ${{ github.event_name != 'workflow_dispatch' \|\| inputs.use_cache }}
	SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev
	SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }}
	DOCKCROSS_ARGS: "-e SCCACHE_WEBDAV_ENDPOINT -e SCCACHE_WEBDAV_TOKEN -e USE_CACHE"
	steps:
	- uses: actions/checkout@v7
	- name: Download shared WebUI assets
	uses: actions/download-artifact@v8
	with:
	name: webui-generated
	path: ${{ github.workspace }}/webui-generated/
	- name: Display CPU Info
	shell: bash
	run: \|
	echo "=== Host CPU Information ==="
	lscpu
	echo ""
	echo "=== CPU Details from /proc/cpuinfo ==="
	cat /proc/cpuinfo
	- name: Build libraries
	shell: bash
	run: \|
	.github/dockcross/dockcross-manylinux_2_28-x64 .github/build_cuda_linux.sh "-DOS_NAME=Linux -DOS_ARCH=x86_64"
	- name: Upload artifacts
	uses: actions/upload-artifact@v7
	with:
	name: linux-libraries-cuda
	path: ${{ github.workspace }}/src/main/resources_linux_cuda/net/ladenthin/llama/

	crosscompile-linux-x86_64:
	name: Cross-Compile manylinux2014 x86_64
	needs: [startgate, build-webui]
	runs-on: ubuntu-latest
	# Phase 2 dockcross cache rollout — job 1, VERIFIED green in CI (PR #245): sccache v0.16.0
	# probe passed in-container (devtoolset-10 gcc), cache ON over Depot WebDAV (cold run: 275
	# objects stored). Steady-state env below — the first-run diagnostics (SCCACHE_LOG /
	# SCCACHE_ERROR_LOG / RUST_BACKTRACE) were dropped now that it is proven. Inert without
	# DEPOT_TOKEN (fork PRs) or with use_cache=false; a crashing sccache still falls back to a
	# green uncached build via the build.sh probe.
	env:
	USE_CACHE: ${{ github.event_name != 'workflow_dispatch' \|\| inputs.use_cache }}
	SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev
	SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }}
	DOCKCROSS_ARGS: "-e SCCACHE_WEBDAV_ENDPOINT -e SCCACHE_WEBDAV_TOKEN -e USE_CACHE"
	steps:
	- uses: actions/checkout@v7
	- name: Download shared WebUI assets
	uses: actions/download-artifact@v8
	with:
	name: webui-generated
	path: ${{ github.workspace }}/webui-generated/
	- name: Display CPU Info
	shell: bash
	run: \|
	echo "=== Host CPU Information ==="
	lscpu
	echo ""
	echo "=== CPU Details from /proc/cpuinfo ==="
	cat /proc/cpuinfo
	- name: Build libraries
	shell: bash
	run: \|
	.github/dockcross/dockcross-manylinux2014-x64 .github/build.sh "-DOS_NAME=Linux -DOS_ARCH=x86_64"
	- name: Upload artifacts
	uses: actions/upload-artifact@v7
	with:
	name: Linux-x86_64-libraries
	path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/

	crosscompile-linux-aarch64:
	name: Build and Test Linux aarch64
	needs: [startgate, build-webui]
	# Native ARM64 build on GitHub's free arm64 runner, mirroring upstream llama.cpp's
	# `ubuntu-cpu` aarch64 release job (ubuntu-24.04-arm + GCC 14). Replaces the former dockcross
	# `linux-arm64-lts` cross-compile (GCC 8.5, glibc 2.17), which can no longer compile llama.cpp
	# b9739 — its C++17 CTAD-in-`new` needs GCC >= 12. Building natively also lets us run the C++
	# unit suite (ctest) on real ARM hardware for the first time (the cross build ran no tests).
	# Trade-off: the glibc floor rises 2.17 -> ~2.39, the same envelope upstream's own ARM binaries
	# require. GGML_NATIVE=OFF keeps the artifact portable across ARMv8 CPU generations (no
	# build-host -march baked in). The job id is kept (a `needs:` target downstream); only the
	# display name changed, so update any branch-protection required-check that pinned the old name.
	runs-on: ubuntu-24.04-arm
	env:
	USE_CACHE: ${{ github.event_name != 'workflow_dispatch' \|\| inputs.use_cache }}
	SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev
	SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }}
	steps:
	- uses: actions/checkout@v7
	- name: Download shared WebUI assets
	uses: actions/download-artifact@v8
	with:
	name: webui-generated
	path: ${{ github.workspace }}/webui-generated/
	- uses: actions/setup-java@v5
	with:
	distribution: 'temurin'
	java-version: ${{ env.JAVA_VERSION }}
	- name: Install toolchain (GCC 14, mirrors upstream llama.cpp ARM release)
	run: \|
	sudo apt-get update
	sudo apt-get install -y gcc-14 g++-14
	echo "CC=gcc-14" >> "$GITHUB_ENV"
	echo "CXX=g++-14" >> "$GITHUB_ENV"
	- name: Display CPU Info
	shell: bash
	run: \|
	echo "=== Host CPU Information ==="
	lscpu
	echo ""
	echo "=== CPU Details from /proc/cpuinfo ==="
	cat /proc/cpuinfo
	- name: Build libraries
	shell: bash
	run: \|
	mvn --no-transfer-progress compile
	.github/build.sh "-DOS_NAME=Linux -DOS_ARCH=aarch64 -DGGML_NATIVE=OFF -DBUILD_TESTING=ON"
	- name: Run C++ unit tests
	run: ctest --test-dir build --output-on-failure
	- name: Upload artifacts
	uses: actions/upload-artifact@v7
	with:
	name: Linux-aarch64-libraries
	path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/

	crosscompile-android-aarch64:
	name: Cross-Compile Android aarch64
	needs: [startgate, build-webui]
	runs-on: ubuntu-latest
	# Phase 2 dockcross cache rollout — job 4. Same steady-state env as manylinux2014 (job 1);
	# the build.sh probe makes it safe to enable without a separate verification run. Inert
	# without DEPOT_TOKEN (fork PRs) or use_cache=false.
	env:
	USE_CACHE: ${{ github.event_name != 'workflow_dispatch' \|\| inputs.use_cache }}
	SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev
	SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }}
	DOCKCROSS_ARGS: "-e SCCACHE_WEBDAV_ENDPOINT -e SCCACHE_WEBDAV_TOKEN -e USE_CACHE"
	steps:
	- uses: actions/checkout@v7
	- name: Download shared WebUI assets
	uses: actions/download-artifact@v8
	with:
	name: webui-generated
	path: ${{ github.workspace }}/webui-generated/
	- name: Display CPU Info
	shell: bash
	run: \|
	echo "=== Host CPU Information ==="
	lscpu
	echo ""
	echo "=== CPU Details from /proc/cpuinfo ==="
	cat /proc/cpuinfo
	- name: Build libraries
	shell: bash
	run: \|
	.github/dockcross/dockcross-android-arm64 .github/build.sh "-DOS_NAME=Linux-Android -DOS_ARCH=aarch64"
	- name: Upload artifacts
	uses: actions/upload-artifact@v7
	with:
	name: Linux-Android-aarch64-libraries
	path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/

	crosscompile-android-aarch64-opencl:
	name: Cross-Compile Android aarch64 (OpenCL/Adreno)
	needs: [startgate, build-webui]
	runs-on: ubuntu-latest
	# Phase 2 dockcross cache rollout — job 5. build_opencl_android.sh stages the OpenCL
	# headers/loader, then delegates the jllama cmake build to build.sh (which owns the
	# sccache probe + launcher). Same steady-state env as the other dockcross jobs. Inert
	# without DEPOT_TOKEN (fork PRs) or use_cache=false.
	env:
	USE_CACHE: ${{ github.event_name != 'workflow_dispatch' \|\| inputs.use_cache }}
	SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev
	SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }}
	DOCKCROSS_ARGS: "-e SCCACHE_WEBDAV_ENDPOINT -e SCCACHE_WEBDAV_TOKEN -e USE_CACHE"
	steps:
	- uses: actions/checkout@v7
	- name: Download shared WebUI assets
	uses: actions/download-artifact@v8
	with:
	name: webui-generated
	path: ${{ github.workspace }}/webui-generated/
	- name: Build libraries
	shell: bash
	run: \|
	.github/dockcross/dockcross-android-arm64 .github/build_opencl_android.sh "-DOS_NAME=Linux-Android -DOS_ARCH=aarch64 -DGGML_OPENCL=ON -DGGML_OPENCL_EMBED_KERNELS=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON"
	- name: Upload artifacts
	uses: actions/upload-artifact@v7
	with:
	name: android-libraries-opencl
	path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/

	# ---------------------------------------------------------------------------
	# Native build jobs — produce release artifacts + run C++ unit tests
	# ---------------------------------------------------------------------------

	build-macos-arm64-no-metal:
	name: Build and Test macOS 15 arm64 (no Metal)
	needs: [startgate, build-webui]
	runs-on: macos-15
	env:
	BUILD_JOBS: 2
	USE_CACHE: ${{ github.event_name != 'workflow_dispatch' \|\| inputs.use_cache }}
	SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev
	SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }}
	steps:
	- uses: actions/checkout@v7
	- name: Download shared WebUI assets
	uses: actions/download-artifact@v8
	with:
	name: webui-generated
	path: ${{ github.workspace }}/webui-generated/
	- uses: actions/setup-java@v5
	with:
	distribution: 'temurin'
	java-version: ${{ env.JAVA_VERSION }}
	- name: Display CPU Info
	shell: bash
	run: \|
	echo "=== CPU Information ==="
	sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null \|\| true
	echo ""
	echo "=== Processor Details ==="
	system_profiler SPHardwareDataType
	- name: Install sccache (shared compiler cache)
	if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != ''
	continue-on-error: true
	run: brew install sccache
	- name: Build libraries
	shell: bash
	run: \|
	mvn --no-transfer-progress compile
	.github/build.sh -DLLAMA_METAL=OFF -DGGML_NATIVE=OFF -DBUILD_TESTING=ON
	- name: Run C++ unit tests
	run: ctest --test-dir build --output-on-failure
	- name: Upload artifacts
	uses: actions/upload-artifact@v7
	with:
	name: macos-15-libraries
	path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/

	build-macos-arm64-metal:
	name: Build and Test macOS 14 arm64 (Metal)
	needs: [startgate, build-webui]
	runs-on: macos-14
	env:
	BUILD_JOBS: 2
	USE_CACHE: ${{ github.event_name != 'workflow_dispatch' \|\| inputs.use_cache }}
	SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev
	SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }}
	steps:
	- uses: actions/checkout@v7
	- name: Download shared WebUI assets
	uses: actions/download-artifact@v8
	with:
	name: webui-generated
	path: ${{ github.workspace }}/webui-generated/
	- uses: actions/setup-java@v5
	with:
	distribution: 'temurin'
	java-version: ${{ env.JAVA_VERSION }}
	- name: Display CPU Info
	shell: bash
	run: \|
	echo "=== CPU Information ==="
	sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null \|\| true
	echo ""
	echo "=== Processor Details ==="
	system_profiler SPHardwareDataType
	- name: Install sccache (shared compiler cache)
	if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != ''
	continue-on-error: true
	run: brew install sccache
	- name: Build libraries
	shell: bash
	run: \|
	mvn --no-transfer-progress compile
	.github/build.sh -DLLAMA_METAL_EMBED_LIBRARY=ON -DBUILD_TESTING=ON
	- name: Run C++ unit tests
	run: ctest --test-dir build --output-on-failure
	- name: Upload artifacts
	uses: actions/upload-artifact@v7
	with:
	name: macos-14-libraries
	path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/

	build-windows-x86_64:
	name: Build and Test Windows 2025 x86_64 (VS 2026)
	needs: [startgate, build-webui]
	runs-on: windows-2025-vs2026
	steps:
	- uses: actions/checkout@v7
	- name: Download shared WebUI assets
	uses: actions/download-artifact@v8
	with:
	name: webui-generated
	path: ${{ github.workspace }}/webui-generated/
	- name: Display CPU Info
	shell: pwsh
	run: \|
	Write-Host "=== CPU Information (Get-CimInstance - All Properties) ==="
	Get-CimInstance Win32_Processor \| Select-Object * \| Format-List
	Write-Host ""
	Write-Host "=== CPU Information (systeminfo) ==="
	systeminfo \| Select-String "Processor"
	Write-Host ""
	Write-Host "=== CPU Information (Get-ComputerInfo) ==="
	Get-ComputerInfo -Property "CsProcessors*" 2>$null \|\| Write-Host "Get-ComputerInfo not available"
	- name: Build libraries
	shell: cmd
	run: \|
	.github\build.bat -G "Visual Studio 18 2026" -A "x64" -DOS_NAME=Windows -DOS_ARCH=x86_64 -DBUILD_TESTING=ON
	- name: Run C++ unit tests
	run: ctest --test-dir build --output-on-failure
	- name: Upload artifacts
	uses: actions/upload-artifact@v7
	with:
	name: Windows-x86_64-libraries
	path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/

	build-windows-x86:
	name: Build and Test Windows 2025 x86 (VS 2026)
	needs: [startgate, build-webui]
	runs-on: windows-2025-vs2026
	steps:
	- uses: actions/checkout@v7
	- name: Download shared WebUI assets
	uses: actions/download-artifact@v8
	with:
	name: webui-generated
	path: ${{ github.workspace }}/webui-generated/
	- name: Display CPU Info
	shell: pwsh
	run: \|
	Write-Host "=== CPU Information (Get-CimInstance - All Properties) ==="
	Get-CimInstance Win32_Processor \| Select-Object * \| Format-List
	Write-Host ""
	Write-Host "=== CPU Information (systeminfo) ==="
	systeminfo \| Select-String "Processor"
	Write-Host ""
	Write-Host "=== CPU Information (Get-ComputerInfo) ==="
	Get-ComputerInfo -Property "CsProcessors*" 2>$null \|\| Write-Host "Get-ComputerInfo not available"
	- name: Build libraries
	shell: cmd
	run: \|
	.github\build.bat -G "Visual Studio 18 2026" -A "Win32" -DOS_NAME=Windows -DOS_ARCH=x86 -DBUILD_TESTING=ON
	- name: Run C++ unit tests
	run: ctest --test-dir build --output-on-failure
	- name: Upload artifacts
	uses: actions/upload-artifact@v7
	with:
	name: Windows-x86-libraries
	path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/

	# ---------------------------------------------------------------------------
	# Windows Ninja Multi-Config + sccache — EVALUATION jobs (not yet released).
	# The Visual Studio generator ignores CMAKE_{C,CXX}_COMPILER_LAUNCHER, so the two
	# build-windows-* jobs above are the only uncached native builds. Upstream
	# llama.cpp ships its windows-cuda artifact with "Ninja Multi-Config" + MSVC,
	# which proves the combination works on the same tree. These two jobs run that
	# combination in parallel with the trusted VS jobs and front cl.exe with sccache
	# over Depot WebDAV (build.bat probe-guards it). Artifacts are named
	# `Windows--ninja` (NOT `-libraries`) so the package job's `pattern: "*-libraries"`
	# does NOT pick them up — they are evaluation-only until cache hits are confirmed,
	# at which point the release path is switched over (see TODO.md). The package job's
	# `needs:` is intentionally left unchanged.
	# ---------------------------------------------------------------------------

	build-windows-x86_64-ninja:
	name: Build and Test Windows 2025 x86_64 (Ninja Multi-Config + sccache, eval)
	needs: [startgate, build-webui]
	runs-on: windows-2025-vs2026
	env:
	USE_CACHE: ${{ github.event_name != 'workflow_dispatch' \|\| inputs.use_cache }}
	SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev
	SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }}
	steps:
	- uses: actions/checkout@v7
	- name: Download shared WebUI assets
	uses: actions/download-artifact@v8
	with:
	name: webui-generated
	path: ${{ github.workspace }}/webui-generated/
	- name: Set up MSVC developer environment (x64)
	uses: ilammy/msvc-dev-cmd@v1
	with:
	arch: x64
	- name: Install sccache (shared compiler cache)
	if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != ''
	continue-on-error: true
	shell: pwsh
	run: \|
	$ver = "0.16.0"
	$rel = "sccache-v$ver-x86_64-pc-windows-msvc"
	$url = "https://github.com/mozilla/sccache/releases/download/v$ver/$rel.zip"
	Write-Host "Downloading $url"
	Invoke-WebRequest -Uri $url -OutFile "$env:RUNNER_TEMP\sccache.zip"
	Expand-Archive -Path "$env:RUNNER_TEMP\sccache.zip" -DestinationPath "$env:RUNNER_TEMP\sccache" -Force
	Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\sccache\$rel"
	- name: Display CPU Info
	shell: pwsh
	run: \|
	Write-Host "=== CPU Information (Get-CimInstance - All Properties) ==="
	Get-CimInstance Win32_Processor \| Select-Object * \| Format-List
	Write-Host ""
	Write-Host "=== CPU Information (systeminfo) ==="
	systeminfo \| Select-String "Processor"
	Write-Host ""
	Write-Host "=== CPU Information (Get-ComputerInfo) ==="
	Get-ComputerInfo -Property "CsProcessors*" 2>$null \|\| Write-Host "Get-ComputerInfo not available"
	- name: Build libraries
	shell: cmd
	run: \|
	.github\build.bat -G "Ninja Multi-Config" -DOS_NAME=Windows -DOS_ARCH=x86_64 -DBUILD_TESTING=ON
	- name: Run C++ unit tests
	run: ctest --test-dir build --output-on-failure
	- name: Upload artifacts
	uses: actions/upload-artifact@v7
	with:
	name: Windows-x86_64-ninja
	path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/

	build-windows-x86-ninja:
	name: Build and Test Windows 2025 x86 (Ninja Multi-Config + sccache, eval)
	needs: [startgate, build-webui]
	runs-on: windows-2025-vs2026
	env:
	USE_CACHE: ${{ github.event_name != 'workflow_dispatch' \|\| inputs.use_cache }}
	SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev
	SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }}
	steps:
	- uses: actions/checkout@v7
	- name: Download shared WebUI assets
	uses: actions/download-artifact@v8
	with:
	name: webui-generated
	path: ${{ github.workspace }}/webui-generated/
	- name: Set up MSVC developer environment (x86)
	uses: ilammy/msvc-dev-cmd@v1
	with:
	arch: x86
	- name: Install sccache (shared compiler cache)
	if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != ''
	continue-on-error: true
	shell: pwsh
	run: \|
	$ver = "0.16.0"
	$rel = "sccache-v$ver-x86_64-pc-windows-msvc"
	$url = "https://github.com/mozilla/sccache/releases/download/v$ver/$rel.zip"
	Write-Host "Downloading $url"
	Invoke-WebRequest -Uri $url -OutFile "$env:RUNNER_TEMP\sccache.zip"
	Expand-Archive -Path "$env:RUNNER_TEMP\sccache.zip" -DestinationPath "$env:RUNNER_TEMP\sccache" -Force
	Add-Content -Path $env:GITHUB_PATH -Value "$env:RUNNER_TEMP\sccache\$rel"
	- name: Display CPU Info
	shell: pwsh
	run: \|
	Write-Host "=== CPU Information (Get-CimInstance - All Properties) ==="
	Get-CimInstance Win32_Processor \| Select-Object * \| Format-List
	Write-Host ""
	Write-Host "=== CPU Information (systeminfo) ==="
	systeminfo \| Select-String "Processor"
	Write-Host ""
	Write-Host "=== CPU Information (Get-ComputerInfo) ==="
	Get-ComputerInfo -Property "CsProcessors*" 2>$null \|\| Write-Host "Get-ComputerInfo not available"
	- name: Build libraries
	shell: cmd
	run: \|
	.github\build.bat -G "Ninja Multi-Config" -DOS_NAME=Windows -DOS_ARCH=x86 -DBUILD_TESTING=ON
	- name: Run C++ unit tests
	run: ctest --test-dir build --output-on-failure
	- name: Upload artifacts
	uses: actions/upload-artifact@v7
	with:
	name: Windows-x86-ninja
	path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/

	# ---------------------------------------------------------------------------
	# CI-only jobs — no release artifact, purely for test coverage
	# ---------------------------------------------------------------------------

	test-cpp-linux-x86_64:
	name: C++ Tests Ubuntu Latest x86_64
	needs: startgate
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v7
	- uses: actions/setup-java@v5
	with:
	distribution: 'temurin'
	java-version: ${{ env.JAVA_VERSION }}
	- name: Display CPU Info
	run: \|
	echo "=== CPU Information ==="
	lscpu
	echo ""
	echo "=== CPU Details from /proc/cpuinfo ==="
	cat /proc/cpuinfo
	- name: Build libraries
	run: \|
	mvn -q --no-transfer-progress compile
	.github/build.sh -DBUILD_TESTING=ON
	- name: Run C++ unit tests
	run: ctest --test-dir build --output-on-failure

	build-macos-arm64-metal-15:
	name: Build and Test macOS 15 arm64 (Metal)
	needs: [startgate, build-webui]
	runs-on: macos-15
	env:
	BUILD_JOBS: 2
	USE_CACHE: ${{ github.event_name != 'workflow_dispatch' \|\| inputs.use_cache }}
	SCCACHE_WEBDAV_ENDPOINT: https://cache.depot.dev
	SCCACHE_WEBDAV_TOKEN: ${{ secrets.DEPOT_TOKEN }}
	steps:
	- uses: actions/checkout@v7
	- name: Download shared WebUI assets
	uses: actions/download-artifact@v8
	with:
	name: webui-generated
	path: ${{ github.workspace }}/webui-generated/
	- uses: actions/setup-java@v5
	with:
	distribution: 'temurin'
	java-version: ${{ env.JAVA_VERSION }}
	- name: Display CPU Info
	shell: bash
	run: \|
	echo "=== CPU Information ==="
	sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null \|\| true
	echo ""
	echo "=== Processor Details ==="
	system_profiler SPHardwareDataType
	- name: Install sccache (shared compiler cache)
	if: env.USE_CACHE == 'true' && env.SCCACHE_WEBDAV_TOKEN != ''
	continue-on-error: true
	run: brew install sccache
	- name: Build libraries
	shell: bash
	run: \|
	mvn --no-transfer-progress compile
	.github/build.sh -DLLAMA_METAL_EMBED_LIBRARY=ON -DGGML_NATIVE=OFF -DBUILD_TESTING=ON
	- name: Run C++ unit tests
	run: ctest --test-dir build --output-on-failure
	- name: Upload artifacts
	uses: actions/upload-artifact@v7
	with:
	name: macos-15-metal-libraries
	path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/

	# ---------------------------------------------------------------------------
	# Java test jobs — download release artifact, run mvn test
	# ---------------------------------------------------------------------------

	test-java-linux-x86_64:
	name: Java Tests Ubuntu Latest x86_64
	needs: crosscompile-linux-x86_64
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v7
	- name: Display CPU Info
	shell: bash
	run: \|
	echo "=== CPU Information ==="
	lscpu
	echo ""
	echo "=== CPU Details from /proc/cpuinfo ==="
	cat /proc/cpuinfo
	- uses: actions/download-artifact@v8
	with:
	name: Linux-x86_64-libraries
	path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
	# GGUF model cache — introduced to stop re-downloading ~5 GB of test models from
	# HuggingFace on every run (also dodges HF rate-limits). Complements the sccache compiler
	# cache but is always ON: there is intentionally NO on/off flag for it (it is GitHub's
	# free cache, safe + free), whereas the sccache cache is toggled by the `use_cache`
	# workflow_dispatch input / USE_CACHE env. Not Depot — GB-scale blobs are usage-priced
	# there and its file cache needs Depot-hosted runners. See CLAUDE.md.
	- name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace)
	uses: actions/cache@v5
	with:
	path: models/
	# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
	# bump the suffix when the model set / URLs change.
	key: gguf-models-v1
	- name: Download text generation model
	run: test -f models/${MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
	- name: Download reranking model
	run: test -f models/${RERANKING_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${RERANKING_MODEL_URL} --create-dirs -o models/${RERANKING_MODEL_NAME}
	- name: Download draft model
	run: test -f models/${DRAFT_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${DRAFT_MODEL_URL} --create-dirs -o models/${DRAFT_MODEL_NAME}
	- name: Download reasoning model
	run: test -f models/${REASONING_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
	- name: Download tool-calling model
	run: test -f models/${TOOL_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
	- name: Download nomic embedding model (issue #98 regression)
	run: test -f models/${NOMIC_EMBED_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${NOMIC_EMBED_MODEL_URL} --create-dirs -o models/${NOMIC_EMBED_MODEL_NAME}
	- name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
	run: test -f models/${VISION_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
	- name: Download vision mmproj
	run: test -f models/${VISION_MMPROJ_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
	- name: List files in models directory
	run: ls -l models/
	- name: Validate model files
	run: bash .github/validate-models.sh
	- uses: actions/setup-java@v5
	with:
	distribution: 'temurin'
	java-version: ${{ env.JAVA_VERSION }}
	- name: Memory before tests
	run: free -h
	- name: Enable core dumps
	run: \|
	ulimit -c unlimited
	echo "${{ github.workspace }}/core.%e.%p" \| sudo tee /proc/sys/kernel/core_pattern
	- name: Download TTS model (OuteTTS)
	run: test -f models/${TTS_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TTS_MODEL_URL} --create-dirs -o models/${TTS_MODEL_NAME}
	- name: Download TTS vocoder (WavTokenizer)
	run: test -f models/${TTS_VOCODER_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TTS_VOCODER_URL} --create-dirs -o models/${TTS_VOCODER_NAME}
	- name: Run tests
	run: \|
	mvn -e --no-transfer-progress -P jcstress test \
	-Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \
	-Dnet.ladenthin.llama.nomic.path=models/${NOMIC_EMBED_MODEL_NAME} \
	-Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \
	-Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \
	-Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH} \
	-Dnet.ladenthin.llama.tts.ttc.model=models/${TTS_MODEL_NAME} \
	-Dnet.ladenthin.llama.tts.vocoder.model=models/${TTS_VOCODER_NAME}
	- uses: actions/upload-artifact@v7
	if: success()
	with:
	name: jacoco-report
	path: target/site/jacoco/jacoco.xml
	if-no-files-found: ignore
	- name: Run PIT mutation tests
	run: mvn --batch-mode --no-transfer-progress test-compile org.pitest:pitest-maven:mutationCoverage
	- name: Extract PIT survivors
	if: always()
	run: \|
	echo "=== PIT Survived Mutations ==="
	for html_file in $(find target/pit-reports -name "*.html" -type f 2>/dev/null \| sort); do
	if grep -q "SURVIVED" "$html_file"; then
	echo "Found survivors in $html_file:"
	grep -B 2 -A 3 "SURVIVED" "$html_file"
	echo ""
	fi
	done
	- uses: actions/upload-artifact@v7
	if: always()
	with: { name: pit-reports, path: target/pit-reports/ }
	- name: Memory after tests
	if: always()
	run: free -h
	- if: failure()
	uses: actions/upload-artifact@v7
	with:
	name: error-log-linux-x86_64
	path: \|
	${{ github.workspace }}/hs_err_pid*.log
	${{ github.workspace }}/core.*
	${{ github.workspace }}/*.hprof
	${{ github.workspace }}/target/surefire-reports/*.dump
	${{ github.workspace }}/target/surefire-reports/*.dumpstream
	${{ github.workspace }}/target/surefire-reports/*.txt
	${{ github.workspace }}/target/surefire-reports/TEST-*.xml
	if-no-files-found: warn

	# ---------------------------------------------------------------------------
	# vmlens interleaving analysis — pure-Java, needs no native library or models.
	# Staged to a single smoke test for now (see the `vmlens` profile in pom.xml).
	# ---------------------------------------------------------------------------
	vmlens:
	name: Test (vmlens interleavings)
	needs: startgate
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v7
	- uses: actions/setup-java@v5
	with:
	distribution: 'temurin'
	java-version: ${{ env.JAVA_VERSION }}
	cache: maven
	- name: Test under vmlens (interleaving analysis)
	# Add each new test in the `vmlens` package to this -Dtest list (surefire
	# -Dtest matches simple class names, not package globs; the default suite is
	# excluded from the vmlens package via pom.xml managed surefire <excludes>).
	run: >-
	mvn --batch-mode --no-transfer-progress -Pvmlens test
	-Dtest=VmlensInterleavingSmokeTest,SessionStateInterleavingTest -DfailIfNoTests=false
	- uses: actions/upload-artifact@v7
	if: always()
	with:
	name: vmlens-report
	path: target/vmlens-report/
	if-no-files-found: ignore

	test-java-macos-arm64-metal:
	name: Java Tests macOS 14 arm64 (Metal)
	needs: build-macos-arm64-metal
	runs-on: macos-14
	steps:
	- uses: actions/checkout@v7
	- name: Display CPU Info
	shell: bash
	run: \|
	echo "=== CPU Information ==="
	sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null \|\| true
	echo ""
	echo "=== Processor Details ==="
	system_profiler SPHardwareDataType
	- uses: actions/download-artifact@v8
	with:
	name: macos-14-libraries
	path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
	# GGUF model cache — introduced to stop re-downloading ~5 GB of test models from
	# HuggingFace on every run (also dodges HF rate-limits). Complements the sccache compiler
	# cache but is always ON: there is intentionally NO on/off flag for it (it is GitHub's
	# free cache, safe + free), whereas the sccache cache is toggled by the `use_cache`
	# workflow_dispatch input / USE_CACHE env. Not Depot — GB-scale blobs are usage-priced
	# there and its file cache needs Depot-hosted runners. See CLAUDE.md.
	- name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace)
	uses: actions/cache@v5
	with:
	path: models/
	# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
	# bump the suffix when the model set / URLs change.
	key: gguf-models-v1
	- name: Download text generation model
	run: test -f models/${MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
	- name: Download reranking model
	run: test -f models/${RERANKING_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${RERANKING_MODEL_URL} --create-dirs -o models/${RERANKING_MODEL_NAME}
	- name: Download draft model
	run: test -f models/${DRAFT_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${DRAFT_MODEL_URL} --create-dirs -o models/${DRAFT_MODEL_NAME}
	- name: Download reasoning model
	run: test -f models/${REASONING_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
	- name: Download tool-calling model
	run: test -f models/${TOOL_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
	- name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
	run: test -f models/${VISION_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
	- name: Download vision mmproj
	run: test -f models/${VISION_MMPROJ_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
	- name: List files in models directory
	run: ls -l models/
	- name: Validate model files
	run: bash .github/validate-models.sh
	- uses: actions/setup-java@v5
	with:
	distribution: 'temurin'
	java-version: ${{ env.JAVA_VERSION }}
	- name: Memory before tests
	run: vm_stat && sysctl hw.memsize hw.physmem
	- name: Enable core dumps
	run: ulimit -c unlimited
	- name: Run tests
	run: \|
	mvn -e --no-transfer-progress -Dnet.ladenthin.llama.test.ngl=0 test \
	-Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \
	-Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \
	-Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \
	-Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH}
	- name: Memory after tests
	if: always()
	run: vm_stat && sysctl hw.memsize hw.physmem
	- if: failure()
	uses: actions/upload-artifact@v7
	with:
	name: error-log-macos-14-metal
	path: \|
	${{ github.workspace }}/hs_err_pid*.log
	${{ github.workspace }}/*.hprof
	${{ github.workspace }}/target/surefire-reports/*.dump
	${{ github.workspace }}/target/surefire-reports/*.dumpstream
	${{ github.workspace }}/target/surefire-reports/*.txt
	${{ github.workspace }}/target/surefire-reports/TEST-*.xml
	if-no-files-found: warn

	test-java-macos-arm64-no-metal:
	name: Java Tests macOS 15 arm64 (no Metal)
	needs: build-macos-arm64-no-metal
	runs-on: macos-15
	steps:
	- uses: actions/checkout@v7
	- name: Display CPU Info
	shell: bash
	run: \|
	echo "=== CPU Information ==="
	sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null \|\| true
	echo ""
	echo "=== Processor Details ==="
	system_profiler SPHardwareDataType
	- uses: actions/download-artifact@v8
	with:
	name: macos-15-libraries
	path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
	# GGUF model cache — introduced to stop re-downloading ~5 GB of test models from
	# HuggingFace on every run (also dodges HF rate-limits). Complements the sccache compiler
	# cache but is always ON: there is intentionally NO on/off flag for it (it is GitHub's
	# free cache, safe + free), whereas the sccache cache is toggled by the `use_cache`
	# workflow_dispatch input / USE_CACHE env. Not Depot — GB-scale blobs are usage-priced
	# there and its file cache needs Depot-hosted runners. See CLAUDE.md.
	- name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace)
	uses: actions/cache@v5
	with:
	path: models/
	# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
	# bump the suffix when the model set / URLs change.
	key: gguf-models-v1
	- name: Download text generation model
	run: test -f models/${MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
	- name: Download reranking model
	run: test -f models/${RERANKING_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${RERANKING_MODEL_URL} --create-dirs -o models/${RERANKING_MODEL_NAME}
	- name: Download draft model
	run: test -f models/${DRAFT_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${DRAFT_MODEL_URL} --create-dirs -o models/${DRAFT_MODEL_NAME}
	- name: Download reasoning model
	run: test -f models/${REASONING_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
	- name: Download tool-calling model
	run: test -f models/${TOOL_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
	- name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
	run: test -f models/${VISION_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
	- name: Download vision mmproj
	run: test -f models/${VISION_MMPROJ_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
	- name: List files in models directory
	run: ls -l models/
	- name: Validate model files
	run: bash .github/validate-models.sh
	- uses: actions/setup-java@v5
	with:
	distribution: 'temurin'
	java-version: ${{ env.JAVA_VERSION }}
	- name: Memory before tests
	run: vm_stat && sysctl hw.memsize hw.physmem
	- name: Enable core dumps
	run: ulimit -c unlimited
	- name: Run tests
	run: \|
	mvn -e --no-transfer-progress test \
	-Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \
	-Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \
	-Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \
	-Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH}
	- name: Memory after tests
	if: always()
	run: vm_stat && sysctl hw.memsize hw.physmem
	- if: failure()
	uses: actions/upload-artifact@v7
	with:
	name: error-log-macos-15-no-metal
	path: \|
	${{ github.workspace }}/hs_err_pid*.log
	${{ github.workspace }}/*.hprof
	${{ github.workspace }}/target/surefire-reports/*.dump
	${{ github.workspace }}/target/surefire-reports/*.dumpstream
	${{ github.workspace }}/target/surefire-reports/*.txt
	${{ github.workspace }}/target/surefire-reports/TEST-*.xml
	if-no-files-found: warn

	test-java-macos-arm64-metal-15:
	name: Java Tests macOS 15 arm64 (Metal)
	needs: build-macos-arm64-metal-15
	runs-on: macos-15
	steps:
	- uses: actions/checkout@v7
	- name: Display CPU Info
	shell: bash
	run: \|
	echo "=== CPU Information ==="
	sysctl hw.model hw.cachelinesize hw.cpufrequency hw.cachesize hw.physicalcpu hw.logicalcpu hw.packages hw.memsize hw.ncpu 2>/dev/null \|\| true
	echo ""
	echo "=== Processor Details ==="
	system_profiler SPHardwareDataType
	- uses: actions/download-artifact@v8
	with:
	name: macos-15-metal-libraries
	path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
	# GGUF model cache — introduced to stop re-downloading ~5 GB of test models from
	# HuggingFace on every run (also dodges HF rate-limits). Complements the sccache compiler
	# cache but is always ON: there is intentionally NO on/off flag for it (it is GitHub's
	# free cache, safe + free), whereas the sccache cache is toggled by the `use_cache`
	# workflow_dispatch input / USE_CACHE env. Not Depot — GB-scale blobs are usage-priced
	# there and its file cache needs Depot-hosted runners. See CLAUDE.md.
	- name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace)
	uses: actions/cache@v5
	with:
	path: models/
	# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
	# bump the suffix when the model set / URLs change.
	key: gguf-models-v1
	- name: Download text generation model
	run: test -f models/${MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${MODEL_URL} --create-dirs -o models/${MODEL_NAME}
	- name: Download reranking model
	run: test -f models/${RERANKING_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${RERANKING_MODEL_URL} --create-dirs -o models/${RERANKING_MODEL_NAME}
	- name: Download draft model
	run: test -f models/${DRAFT_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${DRAFT_MODEL_URL} --create-dirs -o models/${DRAFT_MODEL_NAME}
	- name: Download reasoning model
	run: test -f models/${REASONING_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${REASONING_MODEL_URL} --create-dirs -o models/${REASONING_MODEL_NAME}
	- name: Download tool-calling model
	run: test -f models/${TOOL_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${TOOL_MODEL_URL} --create-dirs -o models/${TOOL_MODEL_NAME}
	- name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
	run: test -f models/${VISION_MODEL_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MODEL_URL} --create-dirs -o models/${VISION_MODEL_NAME}
	- name: Download vision mmproj
	run: test -f models/${VISION_MMPROJ_NAME} \|\| curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors ${VISION_MMPROJ_URL} --create-dirs -o models/${VISION_MMPROJ_NAME}
	- name: List files in models directory
	run: ls -l models/
	- name: Validate model files
	run: bash .github/validate-models.sh
	- uses: actions/setup-java@v5
	with:
	distribution: 'temurin'
	java-version: ${{ env.JAVA_VERSION }}
	- name: Memory before tests
	run: vm_stat && sysctl hw.memsize hw.physmem
	- name: Enable core dumps
	run: ulimit -c unlimited
	- name: Run tests
	run: \|
	mvn -e --no-transfer-progress test \
	-Dnet.ladenthin.llama.tool.model=models/${TOOL_MODEL_NAME} \
	-Dnet.ladenthin.llama.vision.model=models/${VISION_MODEL_NAME} \
	-Dnet.ladenthin.llama.vision.mmproj=models/${VISION_MMPROJ_NAME} \
	-Dnet.ladenthin.llama.vision.image=${VISION_IMAGE_PATH}
	- name: Memory after tests
	if: always()
	run: vm_stat && sysctl hw.memsize hw.physmem
	- if: failure()
	uses: actions/upload-artifact@v7
	with:
	name: error-log-macos-15-metal
	path: \|
	${{ github.workspace }}/hs_err_pid*.log
	${{ github.workspace }}/*.hprof
	${{ github.workspace }}/target/surefire-reports/*.dump
	${{ github.workspace }}/target/surefire-reports/*.dumpstream
	${{ github.workspace }}/target/surefire-reports/*.txt
	${{ github.workspace }}/target/surefire-reports/TEST-*.xml
	if-no-files-found: warn

	test-java-windows-x86_64:
	name: Java Tests Windows 2025 x86_64 (VS 2026)
	needs: build-windows-x86_64
	runs-on: windows-2025-vs2026
	steps:
	- uses: actions/checkout@v7
	- name: Display CPU Info
	shell: pwsh
	run: \|
	Write-Host "=== CPU Information (Get-CimInstance - All Properties) ==="
	Get-CimInstance Win32_Processor \| Select-Object * \| Format-List
	Write-Host ""
	Write-Host "=== CPU Information (systeminfo) ==="
	systeminfo \| Select-String "Processor"
	Write-Host ""
	Write-Host "=== CPU Information (Get-ComputerInfo) ==="
	Get-ComputerInfo -Property "CsProcessors*" 2>$null \|\| Write-Host "Get-ComputerInfo not available"
	- uses: actions/download-artifact@v8
	with:
	name: Windows-x86_64-libraries
	path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
	- name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace)
	uses: actions/cache@v5
	with:
	path: models/
	# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
	# bump the suffix when the model set / URLs change.
	key: gguf-models-v1
	- name: Download text generation model
	run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME }
	- name: Download reranking model
	run: if (-not (Test-Path "models/$env:RERANKING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME }
	- name: Download draft model
	run: if (-not (Test-Path "models/$env:DRAFT_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME }
	- name: Download reasoning model
	run: if (-not (Test-Path "models/$env:REASONING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME }
	- name: Download tool-calling model
	run: if (-not (Test-Path "models/$env:TOOL_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME }
	- name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
	run: if (-not (Test-Path "models/$env:VISION_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME }
	- name: Download vision mmproj
	run: if (-not (Test-Path "models/$env:VISION_MMPROJ_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME }
	- name: List files in models directory
	run: ls -l models/
	- name: Validate model files
	run: .github\validate-models.bat
	- uses: actions/setup-java@v5
	with:
	distribution: 'temurin'
	java-version: ${{ env.JAVA_VERSION }}
	- name: Memory before tests
	run: Get-CimInstance Win32_OperatingSystem \| Select-Object FreePhysicalMemory,TotalVisibleMemorySize \| Format-List
	shell: pwsh
	- name: Enable WER LocalDumps for java.exe
	# Windows Error Reporting writes minidumps when java.exe (or any other
	# registered process) crashes via __fastfail / abort / unhandled SEH.
	# We use it as the Windows analogue of Linux core dumps so that a JVM
	# crash inside the JNI layer leaves us a real native callstack instead
	# of just surefire's "VM terminated without saying goodbye" line.
	# DumpType=2 == MiniDumpWithFullMemory; the workspace dumps/ folder is
	# globbed by the failure-upload step below.
	shell: pwsh
	run: \|
	$key = 'HKLM:\SOFTWARE\Microsoft\Windows\Windows Error Reporting\LocalDumps\java.exe'
	New-Item -Path $key -Force \| Out-Null
	New-Item -Path "${{ github.workspace }}\dumps" -ItemType Directory -Force \| Out-Null
	New-ItemProperty -Path $key -Name 'DumpFolder' -Value "${{ github.workspace }}\dumps" -PropertyType ExpandString -Force \| Out-Null
	New-ItemProperty -Path $key -Name 'DumpType' -Value 2 -PropertyType DWord -Force \| Out-Null
	New-ItemProperty -Path $key -Name 'DumpCount' -Value 5 -PropertyType DWord -Force \| Out-Null
	Get-ItemProperty -Path $key \| Format-List
	- name: Run tests
	run: \|
	mvn -e --no-transfer-progress test `
	"-Dnet.ladenthin.llama.tool.model=models/$env:TOOL_MODEL_NAME" `
	"-Dnet.ladenthin.llama.vision.model=models/$env:VISION_MODEL_NAME" `
	"-Dnet.ladenthin.llama.vision.mmproj=models/$env:VISION_MMPROJ_NAME" `
	"-Dnet.ladenthin.llama.vision.image=$env:VISION_IMAGE_PATH"
	- name: Memory after tests
	if: always()
	run: Get-CimInstance Win32_OperatingSystem \| Select-Object FreePhysicalMemory,TotalVisibleMemorySize \| Format-List
	shell: pwsh
	- if: failure()
	uses: actions/upload-artifact@v7
	with:
	name: windows-output
	path: \|
	${{ github.workspace }}\hs_err_pid*.log
	${{ github.workspace }}\*.hprof
	${{ github.workspace }}\dumps\*.dmp
	${{ github.workspace }}\target\surefire-reports\*.dump
	${{ github.workspace }}\target\surefire-reports\*.dumpstream
	${{ github.workspace }}\target\surefire-reports\*.txt
	${{ github.workspace }}\target\surefire-reports\TEST-*.xml
	${{ github.workspace }}/src/main/resources/net/ladenthin/llama/*/
	if-no-files-found: warn

	# Java/inference validation of the Ninja-built x86_64 DLL (the analogue of
	# test-java-windows-x86_64 for the MSVC build). Loads the Ninja jllama.dll via
	# JNI and runs the full model-backed suite, so both Windows generators are
	# validated end-to-end before the `ninja-windows` classifier JAR ships.
	test-java-windows-x86_64-ninja:
	name: Java Tests Windows 2025 x86_64 (Ninja, eval)
	needs: build-windows-x86_64-ninja
	runs-on: windows-2025-vs2026
	steps:
	- uses: actions/checkout@v7
	- name: Display CPU Info
	shell: pwsh
	run: \|
	Write-Host "=== CPU Information (Get-CimInstance - All Properties) ==="
	Get-CimInstance Win32_Processor \| Select-Object * \| Format-List
	Write-Host ""
	Write-Host "=== CPU Information (systeminfo) ==="
	systeminfo \| Select-String "Processor"
	Write-Host ""
	Write-Host "=== CPU Information (Get-ComputerInfo) ==="
	Get-ComputerInfo -Property "CsProcessors*" 2>$null \|\| Write-Host "Get-ComputerInfo not available"
	- uses: actions/download-artifact@v8
	with:
	name: Windows-x86_64-ninja
	path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
	- name: Cache GGUF models (GitHub Actions cache; avoids re-downloading from HuggingFace)
	uses: actions/cache@v5
	with:
	path: models/
	# GGUF is platform-independent, so ubuntu + macOS + Windows share one entry;
	# bump the suffix when the model set / URLs change.
	key: gguf-models-v1
	- name: Download text generation model
	run: if (-not (Test-Path "models/$env:MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:MODEL_URL --create-dirs -o models/$env:MODEL_NAME }
	- name: Download reranking model
	run: if (-not (Test-Path "models/$env:RERANKING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:RERANKING_MODEL_URL --create-dirs -o models/$env:RERANKING_MODEL_NAME }
	- name: Download draft model
	run: if (-not (Test-Path "models/$env:DRAFT_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:DRAFT_MODEL_URL --create-dirs -o models/$env:DRAFT_MODEL_NAME }
	- name: Download reasoning model
	run: if (-not (Test-Path "models/$env:REASONING_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:REASONING_MODEL_URL --create-dirs -o models/$env:REASONING_MODEL_NAME }
	- name: Download tool-calling model
	run: if (-not (Test-Path "models/$env:TOOL_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:TOOL_MODEL_URL --create-dirs -o models/$env:TOOL_MODEL_NAME }
	- name: Download vision model (upstream kherud/java-llama.cpp#103 / #34)
	run: if (-not (Test-Path "models/$env:VISION_MODEL_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MODEL_URL --create-dirs -o models/$env:VISION_MODEL_NAME }
	- name: Download vision mmproj
	run: if (-not (Test-Path "models/$env:VISION_MMPROJ_NAME")) { curl -L --proto =https --proto-redir =https --fail --retry 5 --retry-all-errors $env:VISION_MMPROJ_URL --create-dirs -o models/$env:VISION_MMPROJ_NAME }
	- name: List files in models directory
	run: ls -l models/
	- name: Validate model files
	run: .github\validate-models.bat
	- uses: actions/setup-java@v5
	with:
	distribution: 'temurin'
	java-version: ${{ env.JAVA_VERSION }}
	- name: Memory before tests
	run: Get-CimInstance Win32_OperatingSystem \| Select-Object FreePhysicalMemory,TotalVisibleMemorySize \| Format-List
	shell: pwsh
	- name: Enable WER LocalDumps for java.exe
	# Windows Error Reporting writes minidumps when java.exe (or any other
	# registered process) crashes via __fastfail / abort / unhandled SEH.
	# We use it as the Windows analogue of Linux core dumps so that a JVM
	# crash inside the JNI layer leaves us a real native callstack instead
	# of just surefire's "VM terminated without saying goodbye" line.
	# DumpType=2 == MiniDumpWithFullMemory; the workspace dumps/ folder is
	# globbed by the failure-upload step below.
	shell: pwsh
	run: \|
	$key = 'HKLM:\SOFTWARE\Microsoft\Windows\Windows Error Reporting\LocalDumps\java.exe'
	New-Item -Path $key -Force \| Out-Null
	New-Item -Path "${{ github.workspace }}\dumps" -ItemType Directory -Force \| Out-Null
	New-ItemProperty -Path $key -Name 'DumpFolder' -Value "${{ github.workspace }}\dumps" -PropertyType ExpandString -Force \| Out-Null
	New-ItemProperty -Path $key -Name 'DumpType' -Value 2 -PropertyType DWord -Force \| Out-Null
	New-ItemProperty -Path $key -Name 'DumpCount' -Value 5 -PropertyType DWord -Force \| Out-Null
	Get-ItemProperty -Path $key \| Format-List
	- name: Run tests
	run: \|
	mvn -e --no-transfer-progress test `
	"-Dnet.ladenthin.llama.tool.model=models/$env:TOOL_MODEL_NAME" `
	"-Dnet.ladenthin.llama.vision.model=models/$env:VISION_MODEL_NAME" `
	"-Dnet.ladenthin.llama.vision.mmproj=models/$env:VISION_MMPROJ_NAME" `
	"-Dnet.ladenthin.llama.vision.image=$env:VISION_IMAGE_PATH"
	- name: Memory after tests
	if: always()
	run: Get-CimInstance Win32_OperatingSystem \| Select-Object FreePhysicalMemory,TotalVisibleMemorySize \| Format-List
	shell: pwsh
	- if: failure()
	uses: actions/upload-artifact@v7
	with:
	name: windows-output-ninja
	path: \|
	${{ github.workspace }}\hs_err_pid*.log
	${{ github.workspace }}\*.hprof
	${{ github.workspace }}\dumps\*.dmp
	${{ github.workspace }}\target\surefire-reports\*.dump
	${{ github.workspace }}\target\surefire-reports\*.dumpstream
	${{ github.workspace }}\target\surefire-reports\*.txt
	${{ github.workspace }}\target\surefire-reports\TEST-*.xml
	${{ github.workspace }}/src/main/resources/net/ladenthin/llama/*/
	if-no-files-found: warn

	# ---------------------------------------------------------------------------
	# Package and publish
	# ---------------------------------------------------------------------------

	package:
	name: Package JARs
	needs:
	- crosscompile-linux-x86_64-cuda
	- crosscompile-linux-aarch64
	- crosscompile-android-aarch64
	- crosscompile-android-aarch64-opencl
	- build-windows-x86
	- build-windows-x86_64-ninja
	- build-windows-x86-ninja
	- test-cpp-linux-x86_64
	- build-macos-arm64-metal-15
	- test-java-linux-x86_64
	- test-java-macos-arm64-metal
	- test-java-macos-arm64-no-metal
	- test-java-macos-arm64-metal-15
	- test-java-windows-x86_64
	- test-java-windows-x86_64-ninja
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v7
	- uses: actions/download-artifact@v8
	with:
	pattern: "*-libraries"
	merge-multiple: true
	path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
	- uses: actions/download-artifact@v8
	with:
	name: linux-libraries-cuda
	path: ${{ github.workspace }}/src/main/resources_linux_cuda/net/ladenthin/llama/
	- uses: actions/download-artifact@v8
	with:
	name: android-libraries-opencl
	path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/
	# Ninja-built Windows natives -> separate tree consumed by the `windows-ninja`
	# Maven profile (the `ninja-windows` classifier JAR). The default JAR keeps the
	# MSVC `*-libraries` natives downloaded above.
	- uses: actions/download-artifact@v8
	with:
	name: Windows-x86_64-ninja
	path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/
	- uses: actions/download-artifact@v8
	with:
	name: Windows-x86-ninja
	path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/
	- uses: actions/setup-java@v5
	with:
	distribution: 'temurin'
	java-version: ${{ env.JAVA_VERSION }}
	- name: Build JARs
	# `assembly` additionally produces the fat jar-with-dependencies uber JAR
	# (llama-<version>-jar-with-dependencies.jar: library classes + Java runtime deps +
	# default-platform native libs in one drop-on-classpath JAR, runnable via its
	# OpenAiCompatServer Main-Class). It lands in target/ and is uploaded in the `llama-jars`
	# artifact below - a CI run artifact only, not a Maven Central / GitHub-Release asset.
	# `windows-ninja` attaches the `ninja-windows` classifier JAR (Ninja-built Windows natives).
	run: mvn --batch-mode --no-transfer-progress -P release,cuda,opencl-android,windows-ninja,assembly -Dmaven.test.skip=true -Dgpg.skip=true package
	- name: Upload JARs
	uses: actions/upload-artifact@v7
	with:
	name: llama-jars
	path: target/*.jar

	report:
	name: Report
	needs: [package]
	runs-on: ubuntu-latest
	permissions:
	contents: write
	steps:
	- uses: actions/checkout@v7
	- uses: actions/setup-java@v5
	with: { java-version: '${{ env.JAVA_VERSION }}', distribution: temurin }
	- uses: actions/download-artifact@v8
	with: { name: jacoco-report, path: target/site/jacoco/ }
	continue-on-error: true
	- uses: advanced-security/maven-dependency-submission-action@v5
	- name: Coveralls
	uses: coverallsapp/github-action@v2
	with:
	github-token: ${{ secrets.GITHUB_TOKEN }}
	file: target/site/jacoco/jacoco.xml
	format: jacoco
	continue-on-error: true
	- name: Codecov
	uses: codecov/codecov-action@v7
	with:
	token: ${{ secrets.CODECOV_TOKEN }}
	files: target/site/jacoco/jacoco.xml
	continue-on-error: true

	check-snapshot:
	name: "Check: main branch / SNAPSHOT"
	needs: [report]
	runs-on: ubuntu-latest
	if: >-
	(github.event_name == 'push' && github.ref == 'refs/heads/main') \|\|
	(github.event_name == 'workflow_dispatch' && !startsWith(github.ref, 'refs/tags/v'))
	steps:
	- name: Confirm snapshot ref
	run: echo "Confirmed on snapshot ref ${{ github.ref }}"

	check-tag:
	name: "Check: v* tag"
	needs: [report]
	runs-on: ubuntu-latest
	if: startsWith(github.ref, 'refs/tags/v')
	steps:
	- name: Confirm tag ref
	run: echo "Confirmed on tag ${{ github.ref }}"

	publish-snapshot:
	name: Publish Snapshot to Central
	needs: [check-snapshot, crosscompile-linux-x86_64-cuda, crosscompile-android-aarch64-opencl, code-style]
	if: needs.check-snapshot.result == 'success' && inputs.publish_to_central
	runs-on: ubuntu-latest
	environment: maven-central
	permissions:
	contents: write
	steps:
	- uses: actions/checkout@v7
	- uses: actions/download-artifact@v8
	with:
	pattern: "*-libraries"
	merge-multiple: true
	path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
	- uses: actions/download-artifact@v8
	with:
	name: linux-libraries-cuda
	path: ${{ github.workspace }}/src/main/resources_linux_cuda/net/ladenthin/llama/
	- uses: actions/download-artifact@v8
	with:
	name: android-libraries-opencl
	path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/
	- uses: actions/download-artifact@v8
	with:
	name: Windows-x86_64-ninja
	path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/
	- uses: actions/download-artifact@v8
	with:
	name: Windows-x86-ninja
	path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/
	- name: Set up Maven Central Repository
	uses: actions/setup-java@v5
	with:
	java-version: ${{ env.JAVA_VERSION }}
	distribution: 'temurin'
	server-id: central
	server-username: MAVEN_USERNAME
	server-password: MAVEN_PASSWORD
	gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }}
	gpg-passphrase: MAVEN_GPG_PASSPHRASE
	- name: Guard - require a -SNAPSHOT version
	shell: bash
	run: \|
	VERSION=$(mvn -q -DforceStdout help:evaluate -Dexpression=project.version \| tail -n1)
	echo "Resolved project version: $VERSION"
	case "$VERSION" in
	*-SNAPSHOT) echo "OK: -SNAPSHOT version, continuing snapshot deploy." ;;
	) echo "::error::Refusing to publish non-SNAPSHOT version '$VERSION' from the snapshot job. Snapshot publishing requires a -SNAPSHOT version; releases go through the v tag path."; exit 1 ;;
	esac
	- name: Publish snapshot
	run: mvn --batch-mode --no-transfer-progress -P release,cuda,opencl-android,windows-ninja -Dmaven.test.skip=true deploy
	env:
	MAVEN_USERNAME: ${{ secrets.CENTRAL_USERNAME }}
	MAVEN_PASSWORD: ${{ secrets.CENTRAL_TOKEN }}
	MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
	- name: Collect signed artifacts
	run: \|
	mkdir -p signed-snapshot-assets
	cp target/*.jar signed-snapshot-assets/ 2>/dev/null \|\| true
	cp target/*.jar.asc signed-snapshot-assets/ 2>/dev/null \|\| true
	- uses: actions/upload-artifact@v7
	with:
	name: signed-snapshot-assets
	path: signed-snapshot-assets/

	github-snapshot:
	name: Update Snapshot Pre-release on GitHub
	needs: [publish-snapshot]
	if: needs.publish-snapshot.result == 'success'
	runs-on: ubuntu-latest
	permissions:
	contents: write
	steps:
	- uses: actions/download-artifact@v8
	with:
	name: signed-snapshot-assets
	path: snapshot-assets/
	- name: Update snapshot pre-release
	env:
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	run: \|
	gh release view snapshot --repo ${{ github.repository }} 2>/dev/null \
	\|\| gh release create snapshot \
	--repo ${{ github.repository }} \
	--prerelease \
	--title "Snapshot (latest)" \
	--notes "Latest snapshot build from the main branch."
	gh release upload snapshot snapshot-assets/* \
	--repo ${{ github.repository }} \
	--clobber

	publish-release:
	name: Publish Release to Central
	if: needs.check-tag.result == 'success' && inputs.publish_to_central
	needs: [check-tag, crosscompile-linux-x86_64-cuda, crosscompile-android-aarch64-opencl, code-style]
	runs-on: ubuntu-latest
	environment: maven-central
	permissions:
	contents: write
	steps:
	- uses: actions/checkout@v7
	- uses: actions/download-artifact@v8
	with:
	pattern: "*-libraries"
	merge-multiple: true
	path: ${{ github.workspace }}/src/main/resources/net/ladenthin/llama/
	- uses: actions/download-artifact@v8
	with:
	name: linux-libraries-cuda
	path: ${{ github.workspace }}/src/main/resources_linux_cuda/net/ladenthin/llama/
	- uses: actions/download-artifact@v8
	with:
	name: android-libraries-opencl
	path: ${{ github.workspace }}/src/main/resources_android_opencl/net/ladenthin/llama/
	- uses: actions/download-artifact@v8
	with:
	name: Windows-x86_64-ninja
	path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/
	- uses: actions/download-artifact@v8
	with:
	name: Windows-x86-ninja
	path: ${{ github.workspace }}/src/main/resources_windows_ninja/net/ladenthin/llama/
	- name: Set up Maven Central Repository
	uses: actions/setup-java@v5
	with:
	java-version: ${{ env.JAVA_VERSION }}
	distribution: 'temurin'
	server-id: central
	server-username: MAVEN_USERNAME
	server-password: MAVEN_PASSWORD
	gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }}
	gpg-passphrase: MAVEN_GPG_PASSPHRASE
	- name: Publish release
	run: mvn --batch-mode --no-transfer-progress -P release,cuda,opencl-android,windows-ninja -Dmaven.test.skip=true deploy
	env:
	MAVEN_USERNAME: ${{ secrets.CENTRAL_USERNAME }}
	MAVEN_PASSWORD: ${{ secrets.CENTRAL_TOKEN }}
	MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
	- name: Collect signed artifacts
	run: \|
	mkdir -p signed-release-assets
	cp target/*.jar signed-release-assets/ 2>/dev/null \|\| true
	cp target/*.jar.asc signed-release-assets/ 2>/dev/null \|\| true
	- uses: actions/upload-artifact@v7
	with:
	name: signed-release-assets
	path: signed-release-assets/

	github-release-signed:
	name: Attach Signed Binaries to GitHub Release
	needs: [publish-release]
	if: needs.publish-release.result == 'success'
	runs-on: ubuntu-latest
	permissions:
	contents: write
	steps:
	- uses: actions/download-artifact@v8
	with:
	name: signed-release-assets
	path: release-assets/
	- name: Upload release assets
	uses: softprops/action-gh-release@v3
	with:
	files: release-assets/*

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Merge pull request #270 from bernardladenthin/claude/laughing-albatta… #454

Workflow file

Merge pull request #270 from bernardladenthin/claude/laughing-albatta… #454

Uh oh!

Workflow file for this run