forked from kherud/java-llama.cpp
-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathOpenAiServerExample.java
More file actions
47 lines (40 loc) · 2.23 KB
/
Copy pathOpenAiServerExample.java
File metadata and controls
47 lines (40 loc) · 2.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
// SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
// SPDX-FileCopyrightText: 2023-2025 Konstantin Herud
//
// SPDX-License-Identifier: MIT
package examples;
import java.io.IOException;
import net.ladenthin.llama.LlamaModel;
import net.ladenthin.llama.parameters.ModelParameters;
import net.ladenthin.llama.server.OpenAiCompatServer;
import net.ladenthin.llama.server.OpenAiServerConfig;
import org.junit.jupiter.api.Disabled;
// Runnable demo (no @Test): starts a local OpenAI-compatible HTTP endpoint over a GGUF model so an
// editor such as VS Code Copilot (Custom Endpoint) can drive it. Point the model path at a local
// GGUF via -Dnet.ladenthin.llama.server.model=... ; @Disabled keeps it out of `mvn test`.
@Disabled
public class OpenAiServerExample {
public static void main(String... args) throws IOException, InterruptedException {
String modelPath = System.getProperty("net.ladenthin.llama.server.model", "models/codellama-7b.Q2_K.gguf");
int port = Integer.getInteger("net.ladenthin.llama.server.port", 8080);
// Two parallel slots let the editor's chat and its background title/summary requests run
// concurrently instead of serializing behind one another.
ModelParameters modelParams =
new ModelParameters().setModel(modelPath).setCtxSize(8192).setParallel(2);
OpenAiServerConfig config = OpenAiServerConfig.builder()
.port(port)
.modelId("local-model")
.maxInputTokens(6144)
.maxOutputTokens(2048)
.build();
try (LlamaModel model = new LlamaModel(modelParams);
OpenAiCompatServer server = new OpenAiCompatServer(model, config).start()) {
String url = "http://127.0.0.1:" + server.getPort() + OpenAiCompatServer.PATH_CHAT_COMPLETIONS;
System.out.println("OpenAI-compatible endpoint ready: " + url);
System.out.println("In VS Code: Chat: Manage Language Models -> Add Models -> Custom Endpoint ->");
System.out.println(" API type 'Chat Completions', then set the model 'url' to: " + url);
System.out.println("Press Ctrl+C to stop.");
Thread.currentThread().join();
}
}
}