Skip to content
Draft
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
58f6c51
chore: initialize pre-release configuration for canary builds
minpeter Nov 3, 2025
df62ec5
Update dependency packages to v6 (beta channel) (#99)
minpeter Nov 3, 2025
c96c293
feat: upgrade language model interfaces to V3 (#101)
minpeter Nov 3, 2025
90f8384
feat: update changeset workflow to use dynamic branch reference
minpeter Nov 3, 2025
5c46b85
Version Packages (canary) (#104)
github-actions[bot] Nov 3, 2025
4c992c3
feat: configure Biome linter file patterns
minpeter Nov 3, 2025
977676f
refactor: update package naming convention and dictionary
minpeter Nov 4, 2025
bb75fec
Revert "refactor: update package naming convention and dictionary"
minpeter Nov 4, 2025
717f944
refactor: reorganize community middleware into separate files
minpeter Nov 4, 2025
754a818
feat: upgrade to AI SDK v3 and add middleware specification
minpeter Nov 4, 2025
be0734b
chore: upgrade ai package from peer to direct dependency
minpeter Nov 4, 2025
a9d1a4f
chore: switch JSONC formatter to VS Code built-in
minpeter Nov 4, 2025
5e31402
feat: typescript references (#106)
minpeter Nov 4, 2025
f558b93
refactor: reorganize test files and add reasoning extraction middleware
minpeter Nov 7, 2025
946668a
refactor: extract helper functions in reasoning middleware
minpeter Nov 7, 2025
59f9f4b
refactor: update mock location data in test file
minpeter Nov 7, 2025
3461fc1
feat: adjust model parameters and add Korean system prompt
minpeter Nov 7, 2025
2778586
feat: improve system prompt structure and tool usage guidelines
minpeter Nov 7, 2025
f529ac0
refactor: remove emoji characters from prompt examples
minpeter Nov 7, 2025
fe90395
feat: openai proxy server (#107)
minpeter Nov 10, 2025
e7089c2
chore: upgrade dependencies to latest versions
minpeter Nov 10, 2025
93c6a8c
Adopt Ultracite code standards and strictNullChecks
minpeter Dec 23, 2025
5c5564c
Add Ultracite code standards and pre-commit formatting hooks
minpeter Dec 23, 2025
fbadc14
Upgrade turbo to version 2.7.1
minpeter Dec 23, 2025
b48924c
refactor: remove internal barrel files and enable noBarrelFile lintin…
minpeter Dec 23, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/late-ducks-think.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@ai-sdk-tool/parser": major
---

upgrade language model interfaces to V3
16 changes: 16 additions & 0 deletions .changeset/pre.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"mode": "pre",
"tag": "canary",
"initialVersions": {
"@ai-sdk-tool/examples-eval-core": "0.0.0",
"@ai-sdk-tool/examples-parser-core": "0.0.0",
"@ai-sdk-tool/examples-rxml-core": "0.0.0",
"@ai-sdk-tool/eval": "0.1.8",
"@ai-sdk-tool/parser": "2.1.7",
"@ai-sdk-tool/rxml": "0.1.1"
},
"changesets": [
"late-ducks-think",
"spicy-ants-scream"
]
}
6 changes: 6 additions & 0 deletions .changeset/spicy-ants-scream.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
"@ai-sdk-tool/parser": major
"@ai-sdk-tool/eval": major
---

bump ai v6 (middleware v3 not yet)
43 changes: 43 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
version: 2
updates:
# Root package dependencies (pnpm workspace)
# This covers all workspace packages in apps/*, packages/*, and internals/*
- package-ecosystem: "npm"
directory: "/"
schedule:
interval: "daily"
time: "09:00"
timezone: "Asia/Seoul"
open-pull-requests-limit: 5
versioning-strategy: increase
groups:
npm-all-updates:
patterns:
- "*"
labels:
- "dependencies"
- "automated"
- "npm"
commit-message:
prefix: "dependabot:"
include: "scope"

# GitHub Actions
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
time: "09:00"
timezone: "Asia/Seoul"
open-pull-requests-limit: 5
groups:
github-actions:
patterns:
- "*"
labels:
- "dependencies"
- "automated"
- "github-actions"
commit-message:
prefix: "dependabot:"
include: "scope"
2 changes: 1 addition & 1 deletion .github/workflows/release-changeset.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
id: changesets
uses: changesets/action@v1
with:
branch: main
branch: ${{ github.ref_name }}
# This expects you to have a script called release which does a build for your packages and calls changeset publish
version: pnpm ci:version
publish: pnpm ci:release
Expand Down
4 changes: 3 additions & 1 deletion docs/concepts/middleware.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# [dev] Middleware Architecture

The middleware composes with AI SDK `LanguageModelV2Middleware` to provide tool calling for models without native support.
The middleware composes with AI SDK `LanguageModelV3Middleware` to provide tool calling for models without native support.

## Responsibilities

Expand All @@ -22,6 +22,7 @@ See `packages/parser/src/tool-call-middleware.ts` and `packages/parser/src/proto
## How it works (end-to-end)

1. `transformParams`

- Extracts custom function tools (`type: "function"`) and renders a system prompt via `protocol.formatTools` and a provided `toolSystemPromptTemplate`.
- Normalizes the existing prompt:
- Assistant tool-call parts are converted to provider-friendly text with `protocol.formatToolCall`.
Expand All @@ -34,6 +35,7 @@ See `packages/parser/src/tool-call-middleware.ts` and `packages/parser/src/proto
- `toolChoice: { type: "none" }`: not supported (throws). Use `auto` (default) instead.

2. `wrapStream`

- If tool-choice fast-path is active, performs a single `generate` call and emits a synthetic `tool-call` followed by `finish`.
- Otherwise, pipes provider stream through `protocol.createStreamParser`, emitting normalized `tool-call` parts as they arrive.

Expand Down
6 changes: 3 additions & 3 deletions docs/evaluation.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,16 @@ See runnable examples in `examples/eval-core/src/*`.

## [dev] Create a Custom Benchmark

Implement `LanguageModelV2Benchmark` and pass it to `evaluate`.
Implement `LanguageModelV3Benchmark` and pass it to `evaluate`.

```ts
import { generateText } from "ai";
import type {
LanguageModelV2Benchmark,
LanguageModelV3Benchmark,
BenchmarkResult,
} from "@ai-sdk-tool/eval";

export const myBenchmark: LanguageModelV2Benchmark = {
export const myBenchmark: LanguageModelV3Benchmark = {
name: "my-benchmark",
version: "1.0.0",
description: "Minimal example",
Expand Down
8 changes: 4 additions & 4 deletions examples/eval-core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
"dependencies": {
"@ai-sdk-tool/eval": "workspace:*",
"@ai-sdk-tool/parser": "workspace:*",
"@ai-sdk/openai": "2.0.56",
"@ai-sdk/openai-compatible": "1.0.23",
"ai": "5.0.81",
"@ai-sdk/openai": "3.0.0-beta.48",
"@ai-sdk/openai-compatible": "2.0.0-beta.31",
"ai": "6.0.0-beta.92",
"tsx": "^4.20.6"
},
"devDependencies": {
"@types/node": "^24.9.1"
"@types/node": "^24.10.0"
}
}
8 changes: 4 additions & 4 deletions examples/parser-core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
"license": "Apache-2.0",
"dependencies": {
"@ai-sdk-tool/parser": "workspace:*",
"@ai-sdk/openai": "2.0.56",
"@ai-sdk/openai-compatible": "1.0.23",
"ai": "5.0.81",
"@ai-sdk/openai": "3.0.0-beta.48",
"@ai-sdk/openai-compatible": "2.0.0-beta.31",
"ai": "6.0.0-beta.92",
"tsx": "^4.20.6",
"zod": "^4.1.12"
},
"devDependencies": {
"@ai-sdk/provider": "2.0.0",
"@types/node": "^24.9.1"
"@types/node": "^24.10.0"
}
}
25 changes: 20 additions & 5 deletions examples/parser-core/src/00-stream-tool-call.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,28 @@ import { z } from "zod";
const MAX_STEPS = 4;
const MAX_TEMPERATURE = 100;

const openrouter = createOpenAICompatible({
name: "openrouter",
apiKey: process.env.OPENROUTER_API_KEY,
baseURL: "https://openrouter.ai/api/v1",
// const openrouter = createOpenAICompatible({
// name: "openrouter",
// apiKey: process.env.OPENROUTER_API_KEY,
// baseURL: "https://openrouter.ai/api/v1",
// });

const friendli = createOpenAICompatible({
name: "friendli",
apiKey: process.env.FRIENDLI_TOKEN,
baseURL: "https://api.friendli.ai/serverless/v1",
includeUsage: true,
fetch: (url, options) => {
const body = options?.body ? JSON.parse(options.body as string) : {};
body.parse_reasoning = true;
return fetch(url, { ...options, body: JSON.stringify(body) });
},
});

async function main() {
const result = streamText({
model: wrapLanguageModel({
model: openrouter("z-ai/glm-4.5-air"),
model: friendli("zai-org/GLM-4.6"),
middleware: sijawaraDetailedXmlToolMiddleware,
}),

Expand Down Expand Up @@ -62,6 +74,9 @@ async function main() {
for await (const part of result.fullStream) {
if (part.type === "text-delta") {
process.stdout.write(part.text);
} else if (part.type === "reasoning-delta") {
// Print reasoning text in a different color (e.g., yellow)
process.stdout.write(`\x1b[33m${part.text}\x1b[0m`);
} else if (part.type === "tool-result") {
console.log({
name: part.toolName,
Expand Down
10 changes: 5 additions & 5 deletions examples/parser-core/src/logging-middleware.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import type {
LanguageModelV2Middleware,
LanguageModelV2StreamPart,
LanguageModelV3Middleware,
LanguageModelV3StreamPart,
} from "@ai-sdk/provider";

const INV = "\x1b[7m"; // ANSI SGR: reverse video
Expand Down Expand Up @@ -39,7 +39,7 @@ function invLog(...args: unknown[]) {
}
}

export const loggingMiddleware: LanguageModelV2Middleware = {
export const loggingMiddleware: LanguageModelV3Middleware = {
wrapGenerate: async ({ doGenerate, params }) => {
invLog("doGenerate called");
invLog(`params: ${JSON.stringify(params, null, 2)}`);
Expand All @@ -62,8 +62,8 @@ export const loggingMiddleware: LanguageModelV2Middleware = {
const textBlocks = new Map<string, string>();

const transformStream = new TransformStream<
LanguageModelV2StreamPart,
LanguageModelV2StreamPart
LanguageModelV3StreamPart,
LanguageModelV3StreamPart
>({
transform(chunk, controller) {
switch (chunk.type) {
Expand Down
2 changes: 1 addition & 1 deletion examples/parser-core/src/xx-debug-file-write.ts
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ async function main() {
detail: string;
first?: FirstToolCall;
}>;
for (let i = 1; i <= MAX_RUNS; i++) {
for (let i = 1; i <= MAX_RUNS; i += 1) {
console.log(`\n--- Run ${i} ---`);
const outcome = await runOnce(i);
results.push(outcome);
Expand Down
2 changes: 1 addition & 1 deletion examples/rxml-core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"tsx": "^4.20.6"
},
"devDependencies": {
"@types/node": "^24.9.1",
"@types/node": "^24.10.0",
"zod": "^4.1.12"
}
}
14 changes: 7 additions & 7 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@
"ci:version": "changeset version"
},
"devDependencies": {
"@biomejs/biome": "2.3.1",
"@biomejs/biome": "2.3.3",
"@changesets/cli": "2.29.7",
"@types/node": "^24.9.1",
"@vitest/coverage-v8": "^4.0.4",
"vitest": "^4.0.4",
"globals": "^16.4.0",
"@types/node": "^24.10.0",
"@vitest/coverage-v8": "^4.0.6",
"globals": "^16.5.0",
"jiti": "^2.6.1",
"npm-run-all": "^4.1.5",
"turbo": "^2.5.8",
"turbo": "^2.6.0",
"typescript": "5.9.3",
"ultracite": "6.0.4"
"ultracite": "6.2.0",
"vitest": "^4.0.6"
},
"packageManager": "[email protected]",
"engines": {
Expand Down
6 changes: 6 additions & 0 deletions packages/eval/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# @ai-sdk-tool/eval

## 1.0.0-canary.0

### Major Changes

- df62ec5: bump ai v6 (middleware v3 not yet)

## 0.1.8

### Patch Changes
Expand Down
8 changes: 4 additions & 4 deletions packages/eval/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ It allows developers to:

## Core Concepts

- **Benchmark (`LanguageModelV2Benchmark`)**: A standardized interface for creating an evaluation task. It has a `run` method that takes a `LanguageModel` and returns a `BenchmarkResult`.
- **Benchmark (`LanguageModelV3Benchmark`)**: A standardized interface for creating an evaluation task. It has a `run` method that takes a `LanguageModel` and returns a `BenchmarkResult`.
- **`evaluate` function**: The core function that runs a set of benchmarks against one or more models and provides a report on the results.
- **Reporter**: Formats the evaluation results into different outputs, such as a human-readable console report or a machine-readable JSON object.

Expand Down Expand Up @@ -77,20 +77,20 @@ cd examples/eval-core && pnpm dlx tsx src/json-generation.ts

## Creating a Custom Benchmark

You can easily create your own benchmark by implementing the `LanguageModelV2Benchmark` interface. This is useful for testing model performance on tasks specific to your application.
You can easily create your own benchmark by implementing the `LanguageModelV3Benchmark` interface. This is useful for testing model performance on tasks specific to your application.

**Example: A custom benchmark to test politeness.**

```typescript
import {
LanguageModelV2Benchmark,
LanguageModelV3Benchmark,
BenchmarkResult,
EvaluateOptions,
} from "@ai-sdk-tool/eval";
import { LanguageModel, generateText } from "ai";

// Define the benchmark object
export const politenessBenchmark: LanguageModelV2Benchmark = {
export const politenessBenchmark: LanguageModelV3Benchmark = {
name: "politeness-check",
version: "1.0.0",
description: "Checks if the model's response is polite.",
Expand Down
6 changes: 3 additions & 3 deletions packages/eval/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@ai-sdk-tool/eval",
"version": "0.1.8",
"version": "1.0.0-canary.0",
"license": "Apache-2.0",
"type": "module",
"main": "./dist/index.js",
Expand Down Expand Up @@ -28,12 +28,12 @@
"test:e2e": "tsup src/run-test.ts --format esm --clean --out-dir dist --no-config && node dist/run-test.js"
},
"dependencies": {
"@ai-sdk/provider": "2.0.0",
"@ai-sdk/provider": "3.0.0-beta.14",
"ajv": "^8.17.1",
"zod": "^4.1.12"
},
"devDependencies": {
"@types/node": "^24.9.1",
"@types/node": "^24.10.0",
"tsup": "^8.5.0",
"typescript": "^5.9.3"
},
Expand Down
9 changes: 5 additions & 4 deletions packages/eval/src/benchmarks/bfcl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import {
tool,
} from "ai";

import type { BenchmarkResult, LanguageModelV2Benchmark } from "@/interfaces";
import type { BenchmarkResult, LanguageModelV3Benchmark } from "@/interfaces";
import { resolveDataDir } from "@/utils/paths";

import {
Expand Down Expand Up @@ -124,7 +124,7 @@ function createBfclBenchmark(
description: string,
testDataFile: string,
answerDataFile: string
): LanguageModelV2Benchmark {
): LanguageModelV3Benchmark {
return {
name,
version: "1.0.0",
Expand Down Expand Up @@ -563,7 +563,7 @@ function createBfclBenchmark(
restoredCalls: Record<string, unknown>[],
usedActual: Set<number>
): number => {
for (let i = 0; i < restoredCalls.length; i++) {
for (let i = 0; i < restoredCalls.length; i += 1) {
if (usedActual.has(i)) {
continue;
}
Expand Down Expand Up @@ -1149,7 +1149,8 @@ function createBfclBenchmark(
.fill(0)
.map(async () => {
while (true) {
const current = idx++;
const current = idx;
idx += 1;
if (current >= items.length) {
break;
}
Expand Down
2 changes: 1 addition & 1 deletion packages/eval/src/benchmarks/bfcl/ast-checker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ export function parallelFunctionCheckerNoOrder(
}

let foundMatch = false;
for (let i = 0; i < modelToolCalls.length; i++) {
for (let i = 0; i < modelToolCalls.length; i += 1) {
if (matchedModelCallIndices.has(i)) {
continue;
}
Expand Down
Loading
Loading