Skip to content

Commit 170a407

Browse files
lochmuellerchr-hertel
authored andcommitted
[Platform][Gemini] Generate single and multi voice audio
1 parent b9a7258 commit 170a407

File tree

3 files changed

+109
-0
lines changed

3 files changed

+109
-0
lines changed
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Platform\Bridge\Gemini\PlatformFactory;
13+
use Symfony\AI\Platform\Message\Message;
14+
use Symfony\AI\Platform\Message\MessageBag;
15+
16+
require_once dirname(__DIR__).'/bootstrap.php';
17+
18+
$platform = PlatformFactory::create(env('GEMINI_API_KEY'), http_client());
19+
20+
$messages = new MessageBag(
21+
Message::ofUser('TTS the following conversation between Joe and Jane:
22+
Joe: Hows it going today Jane?
23+
Jane: Not too bad, how about you?'),
24+
);
25+
$result = $platform->invoke('gemini-2.5-flash-preview-tts', $messages, [
26+
'responseModalities' => ['AUDIO'],
27+
'speechConfig' => [
28+
'multiSpeakerVoiceConfig' => [
29+
'speakerVoiceConfigs' => [
30+
[
31+
'speaker' => 'Joe',
32+
'voiceConfig' => [
33+
'prebuiltVoiceConfig' => [
34+
'voiceName' => 'Kore',
35+
],
36+
],
37+
],
38+
[
39+
'speaker' => 'Jane',
40+
'voiceConfig' => [
41+
'prebuiltVoiceConfig' => [
42+
'voiceName' => 'Puck',
43+
],
44+
],
45+
],
46+
],
47+
],
48+
],
49+
]);
50+
51+
// Example call
52+
// php examples/gemini/multi-speaker-voice.php > out.pcm
53+
// ffmpeg -f s16le -ar 24000 -ac 1 -i out.pcm out.wav
54+
55+
echo $result->asBinary().\PHP_EOL;
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Platform\Bridge\Gemini\PlatformFactory;
13+
use Symfony\AI\Platform\Message\Message;
14+
use Symfony\AI\Platform\Message\MessageBag;
15+
16+
require_once dirname(__DIR__).'/bootstrap.php';
17+
18+
$platform = PlatformFactory::create(env('GEMINI_API_KEY'), http_client());
19+
20+
$messages = new MessageBag(
21+
Message::ofUser('Say cheerfully: Have a wonderful day!'),
22+
);
23+
$result = $platform->invoke('gemini-2.5-flash-preview-tts', $messages, [
24+
'responseModalities' => ['AUDIO'],
25+
'speechConfig' => [
26+
'voiceConfig' => [
27+
'prebuiltVoiceConfig' => [
28+
'voiceName' => 'Kore',
29+
],
30+
],
31+
],
32+
]);
33+
34+
// Example call
35+
// php examples/gemini/single-speaker-voice.php > out.pcm
36+
// ffmpeg -f s16le -ar 24000 -ac 1 -i out.pcm out.wav
37+
38+
echo $result->asBinary().\PHP_EOL;

src/platform/src/Bridge/Gemini/ModelCatalog.php

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,22 @@ public function __construct(array $additionalModels = [])
146146
Capability::TOOL_CALLING,
147147
],
148148
],
149+
'gemini-2.5-flash-preview-tts' => [
150+
'class' => Gemini::class,
151+
'capabilities' => [
152+
Capability::INPUT_MESSAGES,
153+
Capability::OUTPUT_AUDIO,
154+
Capability::TEXT_TO_SPEECH,
155+
],
156+
],
157+
'gemini-2.5-pro-preview-tts' => [
158+
'class' => Gemini::class,
159+
'capabilities' => [
160+
Capability::INPUT_MESSAGES,
161+
Capability::OUTPUT_AUDIO,
162+
Capability::TEXT_TO_SPEECH,
163+
],
164+
],
149165
'gemini-embedding-exp-03-07' => [
150166
'class' => Embeddings::class,
151167
'capabilities' => [Capability::INPUT_MULTIPLE],

0 commit comments

Comments
 (0)