[Platform][Gemini] Generate single and multi voice audio

lochmueller · chr-hertel · commit 170a407ad2ed · 2025-12-13T00:25:12.000+01:00
diff --git a/examples/gemini/multi-speaker-voice.php b/examples/gemini/multi-speaker-voice.php
@@ -0,0 +1,55 @@
+<?php
+
+/*
+ * This file is part of the Symfony package.
+ *
+ * (c) Fabien Potencier <fabien@symfony.com>
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+use Symfony\AI\Platform\Bridge\Gemini\PlatformFactory;
+use Symfony\AI\Platform\Message\Message;
+use Symfony\AI\Platform\Message\MessageBag;
+
+require_once dirname(__DIR__).'/bootstrap.php';
+
+$platform = PlatformFactory::create(env('GEMINI_API_KEY'), http_client());
+
+$messages = new MessageBag(
+    Message::ofUser('TTS the following conversation between Joe and Jane:
+Joe: Hows it going today Jane?
+Jane: Not too bad, how about you?'),
+);
+$result = $platform->invoke('gemini-2.5-flash-preview-tts', $messages, [
+    'responseModalities' => ['AUDIO'],
+    'speechConfig' => [
+        'multiSpeakerVoiceConfig' => [
+            'speakerVoiceConfigs' => [
+                [
+                    'speaker' => 'Joe',
+                    'voiceConfig' => [
+                        'prebuiltVoiceConfig' => [
+                            'voiceName' => 'Kore',
+                        ],
+                    ],
+                ],
+                [
+                    'speaker' => 'Jane',
+                    'voiceConfig' => [
+                        'prebuiltVoiceConfig' => [
+                            'voiceName' => 'Puck',
+                        ],
+                    ],
+                ],
+            ],
+        ],
+    ],
+]);
+
+// Example call
+// php examples/gemini/multi-speaker-voice.php > out.pcm
+// ffmpeg -f s16le -ar 24000 -ac 1 -i out.pcm out.wav
+
+echo $result->asBinary().\PHP_EOL;
diff --git a/examples/gemini/single-speaker-voice.php b/examples/gemini/single-speaker-voice.php
@@ -0,0 +1,38 @@
+<?php
+
+/*
+ * This file is part of the Symfony package.
+ *
+ * (c) Fabien Potencier <fabien@symfony.com>
+ *
+ * For the full copyright and license information, please view the LICENSE
+ * file that was distributed with this source code.
+ */
+
+use Symfony\AI\Platform\Bridge\Gemini\PlatformFactory;
+use Symfony\AI\Platform\Message\Message;
+use Symfony\AI\Platform\Message\MessageBag;
+
+require_once dirname(__DIR__).'/bootstrap.php';
+
+$platform = PlatformFactory::create(env('GEMINI_API_KEY'), http_client());
+
+$messages = new MessageBag(
+    Message::ofUser('Say cheerfully: Have a wonderful day!'),
+);
+$result = $platform->invoke('gemini-2.5-flash-preview-tts', $messages, [
+    'responseModalities' => ['AUDIO'],
+    'speechConfig' => [
+        'voiceConfig' => [
+            'prebuiltVoiceConfig' => [
+                'voiceName' => 'Kore',
+            ],
+        ],
+    ],
+]);
+
+// Example call
+// php examples/gemini/single-speaker-voice.php > out.pcm
+// ffmpeg -f s16le -ar 24000 -ac 1 -i out.pcm out.wav
+
+echo $result->asBinary().\PHP_EOL;
diff --git a/src/platform/src/Bridge/Gemini/ModelCatalog.php b/src/platform/src/Bridge/Gemini/ModelCatalog.php
@@ -146,6 +146,22 @@ public function __construct(array $additionalModels = [])
                     Capability::TOOL_CALLING,
                 ],
             ],
+            'gemini-2.5-flash-preview-tts' => [
+                'class' => Gemini::class,
+                'capabilities' => [
+                    Capability::INPUT_MESSAGES,
+                    Capability::OUTPUT_AUDIO,
+                    Capability::TEXT_TO_SPEECH,
+                ],
+            ],
+            'gemini-2.5-pro-preview-tts' => [
+                'class' => Gemini::class,
+                'capabilities' => [
+                    Capability::INPUT_MESSAGES,
+                    Capability::OUTPUT_AUDIO,
+                    Capability::TEXT_TO_SPEECH,
+                ],
+            ],
             'gemini-embedding-exp-03-07' => [
                 'class' => Embeddings::class,
                 'capabilities' => [Capability::INPUT_MULTIPLE],