The LLM Test Bench API Server provides comprehensive REST, GraphQL, and WebSocket APIs for interacting with the benchmarking system.
- Overview
- Getting Started
- Authentication
- REST API
- GraphQL API
- WebSocket API
- Error Handling
- Rate Limiting
- Examples
┌─────────────────────────────────────────────────────────────┐
│ API Server (Axum) │
│ │
│ ┌────────────────┐ ┌────────────────┐ ┌────────────────┐ │
│ │ REST API │ │ GraphQL API │ │ WebSocket │ │
│ │ (OpenAPI) │ │ (async-gql) │ │ (real-time) │ │
│ └────────┬───────┘ └────────┬───────┘ └────────┬───────┘ │
│ │ │ │ │
│ └───────────────────┼───────────────────┘ │
│ ▼ │
│ ┌───────────────────────────────────────────────────────┐ │
│ │ Middleware Layer │ │
│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ │
│ │ │ Auth │ │ CORS │ │ Rate │ │ Logging │ │ │
│ │ │ (JWT) │ │ │ │ Limit │ │ │ │ │
│ │ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │ │
│ └───────────────────────────────────────────────────────┘ │
│ ▼ │
│ ┌───────────────────────────────────────────────────────┐ │
│ │ Core Business Logic │ │
│ │ Providers | Benchmarks | Evaluators | Plugins │ │
│ └───────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────┘
- REST API: OpenAPI-documented RESTful endpoints
- GraphQL API: Flexible querying with GraphiQL playground
- WebSocket API: Real-time updates and event streaming
- Authentication: JWT tokens and API keys with role-based access
- Rate Limiting: Per-user request throttling
- CORS: Configurable cross-origin support
- Swagger UI: Interactive API documentation
- API Versioning: Backward-compatible versioning
use llm_test_bench_core::api::{ApiServer, ApiConfig};
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let config = ApiConfig::default();
let server = ApiServer::new(config);
server.start().await?;
Ok(())
}use llm_test_bench_core::api::{ApiConfig, CorsConfig};
let config = ApiConfig::builder()
.bind_address("0.0.0.0:3000".parse()?)
.enable_rest(true)
.enable_graphql(true)
.enable_websocket(true)
.jwt_secret(std::env::var("JWT_SECRET")?)
.jwt_expiration(3600)
.rate_limit(100, 50) // 100 rps, burst of 50
.cors(CorsConfig {
allowed_origins: vec!["https://myapp.com".to_string()],
..Default::default()
})
.build();JWT_SECRET=your-secret-key-change-in-production
BIND_ADDRESS=0.0.0.0:3000
ENABLE_SWAGGER=true
CORS_ORIGINS=https://myapp.com,https://staging.myapp.com- Admin: Full access to all endpoints
- User: Standard access to most endpoints
- Viewer: Read-only access
- Service: API-to-API access
curl -X POST http://localhost:3000/v1/auth/login \
-H "Content-Type: application/json" \
-d '{
"username": "user@example.com",
"password": "password"
}'Response:
{
"token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...",
"expires_at": "2024-03-20T15:30:00Z"
}curl -X GET http://localhost:3000/v1/benchmarks \
-H "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9..."curl -X POST http://localhost:3000/v1/auth/api-keys \
-H "Authorization: Bearer <admin-token>" \
-H "Content-Type: application/json" \
-d '{
"name": "My API Key",
"role": "service"
}'Response:
{
"key": "ltb_a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6",
"id": "550e8400-e29b-41d4-a716-446655440000",
"created_at": "2024-03-20T10:00:00Z"
}curl -X GET http://localhost:3000/v1/benchmarks \
-H "X-API-Key: ltb_a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"http://localhost:3000/v1
GET /healthResponse:
{
"status": "healthy",
"version": "1.0.0",
"timestamp": "2024-03-20T10:00:00Z",
"uptime_seconds": 3600
}POST /v1/completionsRequest:
{
"provider": "openai",
"model": "gpt-4",
"prompt": "What is the capital of France?",
"max_tokens": 100,
"temperature": 0.7
}Response:
{
"text": "The capital of France is Paris.",
"model": "gpt-4",
"usage": {
"prompt_tokens": 10,
"completion_tokens": 8,
"total_tokens": 18
},
"request_id": "550e8400-e29b-41d4-a716-446655440000",
"created_at": "2024-03-20T10:00:00Z"
}POST /v1/evaluationsRequest:
{
"provider": "openai",
"model": "gpt-4",
"input": "What is 2+2?",
"output": "The answer is 4.",
"expected": "4",
"metrics": ["faithfulness", "relevance", "coherence"]
}Response:
{
"score": 0.95,
"metrics": {
"faithfulness": 1.0,
"relevance": 0.9,
"coherence": 0.95
},
"evaluation_id": "550e8400-e29b-41d4-a716-446655440000",
"created_at": "2024-03-20T10:00:00Z"
}POST /v1/benchmarksRequest:
{
"name": "GPT-4 vs Claude Performance",
"providers": [
{
"provider": "openai",
"model": "gpt-4"
},
{
"provider": "anthropic",
"model": "claude-3-opus-20240229"
}
],
"dataset": "mmlu",
"metrics": ["accuracy", "latency", "cost"],
"iterations": 100
}Response:
{
"benchmark_id": "550e8400-e29b-41d4-a716-446655440000",
"status": "pending",
"progress": 0.0,
"results": null
}GET /v1/benchmarks/:idResponse:
{
"benchmark_id": "550e8400-e29b-41d4-a716-446655440000",
"status": "running",
"progress": 0.45,
"results": null
}GET /v1/plugins?page=1&page_size=20Response:
{
"items": [
{
"id": "550e8400-e29b-41d4-a716-446655440000",
"name": "custom-evaluator",
"version": "1.0.0",
"plugin_type": "evaluator",
"status": "ready"
}
],
"total": 1,
"page": 1,
"page_size": 20,
"total_pages": 1
}POST /v1/plugins/:id/executeRequest:
{
"input": {
"text": "Sample input",
"parameters": {
"threshold": 0.8
}
}
}Response:
{
"output": {
"score": 0.92,
"details": "Evaluation completed successfully"
},
"execution_time_ms": 150,
"request_id": "550e8400-e29b-41d4-a716-446655440000"
}Most list endpoints support pagination:
GET /v1/benchmarks?page=2&page_size=50Parameters:
page: Page number (default: 1)page_size: Items per page (default: 20, max: 100)
http://localhost:3000/graphql
Open http://localhost:3000/graphql in a browser to access the interactive GraphiQL playground.
type Query {
# Get API version
version: String!
# Health check
health: Boolean!
# List plugins
plugins: [PluginNode!]!
# Get plugin by ID
plugin(id: String!): PluginNode
}type Mutation {
# Create completion
createCompletion(input: CompletionInput!): CompletionResult!
# Create evaluation
createEvaluation(input: EvaluationInput!): EvaluationResult!
}input CompletionInput {
provider: String!
model: String!
prompt: String!
maxTokens: Int
temperature: Float
}
type CompletionResult {
text: String!
model: String!
requestId: String!
createdAt: DateTime!
}
input EvaluationInput {
provider: String!
model: String!
input: String!
output: String!
expected: String
metrics: [String!]!
}
type EvaluationResult {
score: Float!
evaluationId: String!
createdAt: DateTime!
}
type PluginNode {
id: String!
name: String!
version: String!
pluginType: String!
status: String!
}query {
version
health
}query {
plugins {
id
name
version
pluginType
status
}
}mutation {
createCompletion(input: {
provider: "openai"
model: "gpt-4"
prompt: "What is the capital of France?"
maxTokens: 100
}) {
text
model
requestId
createdAt
}
}mutation {
createEvaluation(input: {
provider: "openai"
model: "gpt-4"
input: "What is 2+2?"
output: "The answer is 4."
expected: "4"
metrics: ["faithfulness", "relevance"]
}) {
score
evaluationId
createdAt
}
}ws://localhost:3000/ws
All WebSocket messages are JSON with a type field:
{
"type": "message_type",
...
}Subscribe to event topics:
{
"type": "subscribe",
"topics": ["benchmark.progress", "evaluation.results"]
}Unsubscribe from topics:
{
"type": "unsubscribe",
"topics": ["benchmark.progress"]
}Server sends events:
{
"type": "event",
"topic": "benchmark.progress",
"data": {
"benchmark_id": "550e8400-e29b-41d4-a716-446655440000",
"progress": 0.45,
"status": "running"
},
"timestamp": 1710936000000
}Keep-alive messages:
{
"type": "ping",
"timestamp": 1710936000000
}{
"type": "pong",
"timestamp": 1710936000000
}Server acknowledgment:
{
"type": "ack",
"request_id": "req_123",
"message": "Subscribed to 2 topics"
}Server error:
{
"type": "error",
"code": "INVALID_MESSAGE",
"message": "Failed to parse message"
}benchmark.progress- Benchmark execution updatesevaluation.results- Evaluation completionssystem.metrics- System performance metricsplugin.events- Plugin lifecycle eventsrequest.logs- API request logs
const ws = new WebSocket('ws://localhost:3000/ws');
ws.onopen = () => {
console.log('Connected');
// Subscribe to topics
ws.send(JSON.stringify({
type: 'subscribe',
topics: ['benchmark.progress']
}));
};
ws.onmessage = (event) => {
const msg = JSON.parse(event.data);
if (msg.type === 'event') {
console.log(`Event on ${msg.topic}:`, msg.data);
}
};
ws.onerror = (error) => {
console.error('WebSocket error:', error);
};
ws.onclose = () => {
console.log('Disconnected');
};# Install wscat
npm install -g wscat
# Connect
wscat -c ws://localhost:3000/ws
# Subscribe
> {"type": "subscribe", "topics": ["benchmark.progress"]}
# Receive events
< {"type":"event","topic":"benchmark.progress","data":{...},"timestamp":1710936000000}{
"error": {
"code": "ERROR_CODE",
"message": "Human-readable error message",
"details": {
"field": "additional context"
}
},
"request_id": "550e8400-e29b-41d4-a716-446655440000",
"timestamp": "2024-03-20T10:00:00Z"
}200- Success201- Created400- Bad Request (invalid input)401- Unauthorized (missing/invalid auth)403- Forbidden (insufficient permissions)404- Not Found409- Conflict (resource already exists)429- Too Many Requests (rate limit exceeded)500- Internal Server Error503- Service Unavailable
BAD_REQUEST- Invalid request parametersUNAUTHORIZED- Authentication requiredFORBIDDEN- Insufficient permissionsNOT_FOUND- Resource not foundCONFLICT- Resource conflictRATE_LIMIT_EXCEEDED- Too many requestsINTERNAL_ERROR- Server errorSERVICE_UNAVAILABLE- Service temporarily unavailable
Default rate limits per user:
- 100 requests per second
- Burst of 50 requests
Rate limit information is returned in response headers:
X-RateLimit-Limit: 100
X-RateLimit-Remaining: 95
X-RateLimit-Reset: 1710936060
When rate limit is exceeded, you'll receive a 429 response:
{
"error": {
"code": "RATE_LIMIT_EXCEEDED",
"message": "Rate limit exceeded. Try again in 5 seconds.",
"retry_after": 5
}
}# 1. Create completion
COMPLETION=$(curl -s -X POST http://localhost:3000/v1/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $TOKEN" \
-d '{
"provider": "openai",
"model": "gpt-4",
"prompt": "What is AI?",
"max_tokens": 100
}')
echo $COMPLETION | jq .
# 2. Create evaluation
EVAL=$(curl -s -X POST http://localhost:3000/v1/evaluations \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $TOKEN" \
-d "{
\"provider\": \"openai\",
\"model\": \"gpt-4\",
\"input\": \"What is AI?\",
\"output\": $(echo $COMPLETION | jq -r .text),
\"metrics\": [\"coherence\", \"relevance\"]
}")
echo $EVAL | jq .
# 3. Create benchmark
BENCHMARK=$(curl -s -X POST http://localhost:3000/v1/benchmarks \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $TOKEN" \
-d '{
"name": "Model Comparison",
"providers": [
{"provider": "openai", "model": "gpt-4"},
{"provider": "anthropic", "model": "claude-3-opus-20240229"}
],
"dataset": "mmlu",
"metrics": ["accuracy"],
"iterations": 10
}')
BENCHMARK_ID=$(echo $BENCHMARK | jq -r .benchmark_id)
# 4. Poll for completion
while true; do
STATUS=$(curl -s http://localhost:3000/v1/benchmarks/$BENCHMARK_ID \
-H "Authorization: Bearer $TOKEN")
PROGRESS=$(echo $STATUS | jq -r .progress)
echo "Progress: $PROGRESS"
if [ $(echo "$PROGRESS >= 1.0" | bc) -eq 1 ]; then
break
fi
sleep 2
done
echo "Benchmark complete!"
echo $STATUS | jq .import requests
import json
GRAPHQL_URL = "http://localhost:3000/graphql"
TOKEN = "your-jwt-token"
def graphql_query(query, variables=None):
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {TOKEN}"
}
payload = {"query": query}
if variables:
payload["variables"] = variables
response = requests.post(
GRAPHQL_URL,
headers=headers,
json=payload
)
return response.json()
# Query example
result = graphql_query("""
query {
version
health
plugins {
id
name
version
}
}
""")
print(json.dumps(result, indent=2))
# Mutation example
result = graphql_query("""
mutation($input: CompletionInput!) {
createCompletion(input: $input) {
text
model
requestId
}
}
""", {
"input": {
"provider": "openai",
"model": "gpt-4",
"prompt": "Hello, world!",
"maxTokens": 50
}
})
print(json.dumps(result, indent=2))import asyncio
import websockets
import json
async def websocket_client():
uri = "ws://localhost:3000/ws"
async with websockets.connect(uri) as websocket:
# Subscribe to topics
await websocket.send(json.dumps({
"type": "subscribe",
"topics": ["benchmark.progress", "evaluation.results"]
}))
# Receive messages
async for message in websocket:
data = json.loads(message)
if data["type"] == "event":
topic = data["topic"]
event_data = data["data"]
print(f"Event on {topic}: {event_data}")
elif data["type"] == "ack":
print(f"Ack: {data['message']}")
asyncio.run(websocket_client())The complete OpenAPI specification is available at:
http://localhost:3000/swagger-ui
You can also access the raw OpenAPI JSON:
http://localhost:3000/api-docs/openapi.json
For issues, questions, or contributions, please visit:
- GitHub: https://github.com/yourusername/llm-test-bench
- Documentation: https://docs.llm-test-bench.dev
Licensed under Apache 2.0 or MIT license.