1- // Copyright 2025 Google LLC
1+ // Copyright 2026 Google LLC
22//
33// Licensed under the Apache License, Version 2.0 (the "License");
44// you may not use this file except in compliance with the License.
@@ -161,18 +161,85 @@ message Voice {
161161
162162// Used for advanced voice options.
163163message AdvancedVoiceOptions {
164+ // Safety setting for a single harm category.
165+ message SafetySetting {
166+ // The harm category to apply the safety setting to.
167+ HarmCategory category = 1 ;
168+
169+ // The harm block threshold for the safety setting.
170+ HarmBlockThreshold threshold = 2 ;
171+ }
172+
173+ // Safety settings for the request.
174+ message SafetySettings {
175+ // The safety settings for the request.
176+ repeated SafetySetting settings = 1 ;
177+ }
178+
179+ // Harm categories that will block the content.
180+ enum HarmCategory {
181+ // Default value. This value is unused.
182+ HARM_CATEGORY_UNSPECIFIED = 0 ;
183+
184+ // Content that promotes violence or incites hatred against individuals or
185+ // groups based on certain attributes.
186+ HARM_CATEGORY_HATE_SPEECH = 1 ;
187+
188+ // Content that promotes, facilitates, or enables dangerous activities.
189+ HARM_CATEGORY_DANGEROUS_CONTENT = 2 ;
190+
191+ // Abusive, threatening, or content intended to bully, torment, or ridicule.
192+ HARM_CATEGORY_HARASSMENT = 3 ;
193+
194+ // Content that contains sexually explicit material.
195+ HARM_CATEGORY_SEXUALLY_EXPLICIT = 4 ;
196+ }
197+
198+ // Harm block thresholds for the safety settings.
199+ enum HarmBlockThreshold {
200+ // The harm block threshold is unspecified.
201+ HARM_BLOCK_THRESHOLD_UNSPECIFIED = 0 ;
202+
203+ // Block content with a low harm probability or higher.
204+ BLOCK_LOW_AND_ABOVE = 1 ;
205+
206+ // Block content with a medium harm probability or higher.
207+ BLOCK_MEDIUM_AND_ABOVE = 2 ;
208+
209+ // Block content with a high harm probability.
210+ BLOCK_ONLY_HIGH = 3 ;
211+
212+ // Do not block any content, regardless of its harm probability.
213+ BLOCK_NONE = 4 ;
214+
215+ // Turn off the safety filter entirely.
216+ OFF = 5 ;
217+ }
218+
164219 // Only for Journey voices. If false, the synthesis is context aware
165220 // and has a higher latency.
166221 optional bool low_latency_journey_synthesis = 1 ;
167222
168- // Optional. Input only. If true, relaxes safety filters for Gemini TTS. Only
169- // supported for accounts linked to Invoiced (Offline) Cloud billing accounts.
170- // Otherwise, will return result
171- // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
223+ // Optional. Input only. Deprecated, use safety_settings instead.
224+ // If true, relaxes safety filters for Gemini TTS.
172225 bool relax_safety_filters = 8 [
226+ deprecated = true ,
173227 (google.api.field_behavior ) = INPUT_ONLY ,
174228 (google.api.field_behavior ) = OPTIONAL
175229 ];
230+
231+ // Optional. Input only. This applies to Gemini TTS only. If set, the category
232+ // specified in the safety setting will be blocked if the harm probability is
233+ // above the threshold. Otherwise, the safety filter will be disabled by
234+ // default.
235+ SafetySettings safety_settings = 9 [
236+ (google.api.field_behavior ) = INPUT_ONLY ,
237+ (google.api.field_behavior ) = OPTIONAL
238+ ];
239+
240+ // Optional. If true, textnorm will be applied to text input. This feature is
241+ // enabled by default. Only applies for Gemini TTS.
242+ optional bool enable_textnorm = 2 [(google.api.field_behavior ) = OPTIONAL ];
176243}
177244
178245// The top-level message sent by the client for the `SynthesizeSpeech` method.
@@ -186,8 +253,9 @@ message SynthesizeSpeechRequest {
186253 // Required. The configuration of the synthesized audio.
187254 AudioConfig audio_config = 3 [(google.api.field_behavior ) = REQUIRED ];
188255
189- // Advanced voice options.
190- optional AdvancedVoiceOptions advanced_voice_options = 8 ;
256+ // Optional. Advanced voice options.
257+ optional AdvancedVoiceOptions advanced_voice_options = 8
258+ [(google.api.field_behavior ) = OPTIONAL ];
191259}
192260
193261// Pronunciation customization for a phrase.
@@ -296,8 +364,8 @@ message SynthesisInput {
296364 // The raw text to be synthesized.
297365 string text = 1 ;
298366
299- // Markup for HD voices specifically. This field may not be used with any
300- // other voices.
367+ // Markup for Chirp 3: HD voices specifically. This field may not be used
368+ // with any other voices.
301369 string markup = 5 ;
302370
303371 // The SSML document to be synthesized. The SSML document must be valid
@@ -522,6 +590,10 @@ message StreamingSynthesizeConfig {
522590 // be inside a phoneme tag.
523591 CustomPronunciations custom_pronunciations = 5
524592 [(google.api.field_behavior ) = OPTIONAL ];
593+
594+ // Optional. Advanced voice options.
595+ optional AdvancedVoiceOptions advanced_voice_options = 7
596+ [(google.api.field_behavior ) = OPTIONAL ];
525597}
526598
527599// Input to be synthesized.
@@ -532,8 +604,8 @@ message StreamingSynthesisInput {
532604 // in the output audio.
533605 string text = 1 ;
534606
535- // Markup for HD voices specifically. This field may not be used with any
536- // other voices.
607+ // Markup for Chirp 3: HD voices specifically. This field may not be used
608+ // with any other voices.
537609 string markup = 5 ;
538610
539611 // Multi-speaker markup for Gemini TTS. This field may not
0 commit comments