Skip to content

Commit cb94c17

Browse files
feat(restate): add Soniox STT provider support (#2186)
* feat(restate): add Soniox STT provider support - Add SONIOX_API_KEY to env schema - Create soniox.ts with webhook-based transcription - Update SttFileInput to include optional provider param (defaults to deepgram) - Update run handler to branch on provider - Update onTranscript to handle both Deepgram and Soniox callbacks Co-Authored-By: yujonglee <[email protected]> * refactor(restate): use ctx.request().extraArgs for Soniox API key instead of workflow state Co-Authored-By: yujonglee <[email protected]> --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: yujonglee <[email protected]>
1 parent 446b7e4 commit cb94c17

File tree

3 files changed

+136
-8
lines changed

3 files changed

+136
-8
lines changed

apps/restate/src/env.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ export const envSchema = z.object({
44
RESTATE_IDENTITY_KEY: z.string().optional(),
55
RESTATE_INGRESS_URL: z.url(),
66
DEEPGRAM_API_KEY: z.string().min(1),
7+
SONIOX_API_KEY: z.string().min(1),
78
OPENROUTER_API_KEY: z.string().min(1),
89
SUPABASE_URL: z.url(),
910
SUPABASE_SERVICE_ROLE_KEY: z.string().min(1),

apps/restate/src/services/stt-file.ts

Lines changed: 51 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,26 @@ import {
66
DeepgramCallback,
77
DeepgramCallbackType,
88
extractTranscript,
9-
transcribeWithCallback,
9+
transcribeWithCallback as transcribeWithDeepgram,
1010
} from "../deepgram";
1111
import { type Env } from "../env";
12+
import {
13+
fetchTranscript as fetchSonioxTranscript,
14+
SonioxCallback,
15+
SonioxCallbackType,
16+
transcribeWithCallback as transcribeWithSoniox,
17+
} from "../soniox";
1218
import { createSignedUrl, deleteFile } from "../supabase";
1319
import { limiter } from "./rate-limit";
1420

21+
const SttProvider = z.enum(["deepgram", "soniox"]);
22+
23+
export type SttProviderType = z.infer<typeof SttProvider>;
24+
1525
const SttFileInput = z.object({
1626
userId: z.string(),
1727
fileId: z.string(),
28+
provider: SttProvider.optional().default("deepgram"),
1829
});
1930

2031
export type SttFileInputType = z.infer<typeof SttFileInput>;
@@ -31,6 +42,7 @@ export const sttFile = restate.workflow({
3142
async (ctx: restate.WorkflowContext, input: SttFileInputType) => {
3243
ctx.set("status", "QUEUED" as SttStatusType);
3344
ctx.set("fileId", input.fileId);
45+
ctx.set("provider", input.provider);
3446

3547
const env = ctx.request().extraArgs[0] as Env;
3648

@@ -48,10 +60,21 @@ export const sttFile = restate.workflow({
4860

4961
const callbackUrl = `${env.RESTATE_INGRESS_URL.replace(/\/+$/, "")}/SttFile/${encodeURIComponent(ctx.key)}/onTranscript`;
5062

51-
const requestId = await ctx.run("transcribe", () =>
52-
transcribeWithCallback(audioUrl, callbackUrl, env.DEEPGRAM_API_KEY),
53-
);
54-
ctx.set("deepgramRequestId", requestId);
63+
if (input.provider === "soniox") {
64+
const requestId = await ctx.run("transcribe", () =>
65+
transcribeWithSoniox(audioUrl, callbackUrl, env.SONIOX_API_KEY),
66+
);
67+
ctx.set("providerRequestId", requestId);
68+
} else {
69+
const requestId = await ctx.run("transcribe", () =>
70+
transcribeWithDeepgram(
71+
audioUrl,
72+
callbackUrl,
73+
env.DEEPGRAM_API_KEY,
74+
),
75+
);
76+
ctx.set("providerRequestId", requestId);
77+
}
5578

5679
const transcript = await ctx.promise<string>("transcript");
5780
ctx.set("transcript", transcript);
@@ -72,15 +95,35 @@ export const sttFile = restate.workflow({
7295
),
7396

7497
onTranscript: restate.handlers.workflow.shared(
75-
{ input: serde.zod(DeepgramCallback) },
98+
{ input: serde.zod(z.union([DeepgramCallback, SonioxCallback])) },
7699
async (
77100
ctx: restate.WorkflowSharedContext,
78-
payload: DeepgramCallbackType,
101+
payload: DeepgramCallbackType | SonioxCallbackType,
79102
): Promise<void> => {
80103
const existing = await ctx.get<string>("transcript");
81104
if (existing !== undefined) return;
82105

83-
ctx.promise<string>("transcript").resolve(extractTranscript(payload));
106+
const provider = await ctx.get<SttProviderType>("provider");
107+
108+
if (provider === "soniox" && "id" in payload && "status" in payload) {
109+
const sonioxPayload = payload as SonioxCallbackType;
110+
if (sonioxPayload.status === "error") {
111+
ctx
112+
.promise<string>("transcript")
113+
.reject("Soniox transcription failed");
114+
return;
115+
}
116+
const env = ctx.request().extraArgs[0] as Env;
117+
const transcript = await fetchSonioxTranscript(
118+
sonioxPayload.id,
119+
env.SONIOX_API_KEY,
120+
);
121+
ctx.promise<string>("transcript").resolve(transcript);
122+
} else {
123+
ctx
124+
.promise<string>("transcript")
125+
.resolve(extractTranscript(payload as DeepgramCallbackType));
126+
}
84127
},
85128
),
86129

apps/restate/src/soniox.ts

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import { z } from "zod";
2+
3+
const SONIOX_API_HOST = "https://api.soniox.com";
4+
5+
export const SonioxCallback = z.object({
6+
id: z.string(),
7+
status: z.enum(["completed", "error"]),
8+
});
9+
10+
export type SonioxCallbackType = z.infer<typeof SonioxCallback>;
11+
12+
type SonioxToken = {
13+
text: string;
14+
start_ms?: number;
15+
end_ms?: number;
16+
confidence?: number;
17+
speaker?: number | string;
18+
};
19+
20+
type SonioxTranscriptResponse = {
21+
text: string;
22+
tokens: SonioxToken[];
23+
};
24+
25+
export async function transcribeWithCallback(
26+
audioUrl: string,
27+
callbackUrl: string,
28+
apiKey: string,
29+
): Promise<string> {
30+
const response = await fetch(`${SONIOX_API_HOST}/v1/transcriptions`, {
31+
method: "POST",
32+
headers: {
33+
Authorization: `Bearer ${apiKey}`,
34+
"Content-Type": "application/json",
35+
},
36+
body: JSON.stringify({
37+
model: "stt-async-v3",
38+
audio_url: audioUrl,
39+
webhook_url: callbackUrl,
40+
enable_speaker_diarization: true,
41+
enable_language_identification: true,
42+
}),
43+
});
44+
45+
if (!response.ok) {
46+
const errorText = await response.text();
47+
throw new Error(`Soniox: ${response.status} - ${errorText}`);
48+
}
49+
50+
const result = (await response.json()) as { id: string };
51+
if (!result.id) {
52+
throw new Error("Soniox: missing transcription id");
53+
}
54+
55+
return result.id;
56+
}
57+
58+
export async function fetchTranscript(
59+
transcriptionId: string,
60+
apiKey: string,
61+
): Promise<string> {
62+
const response = await fetch(
63+
`${SONIOX_API_HOST}/v1/transcriptions/${transcriptionId}/transcript`,
64+
{
65+
headers: {
66+
Authorization: `Bearer ${apiKey}`,
67+
},
68+
},
69+
);
70+
71+
if (!response.ok) {
72+
const errorText = await response.text();
73+
throw new Error(
74+
`Soniox fetch transcript: ${response.status} - ${errorText}`,
75+
);
76+
}
77+
78+
const result = (await response.json()) as SonioxTranscriptResponse;
79+
return result.text || renderTokens(result.tokens);
80+
}
81+
82+
function renderTokens(tokens: SonioxToken[]): string {
83+
return tokens.map((token) => token.text).join("");
84+
}

0 commit comments

Comments
 (0)