Skip to content

Commit a481d8b

Browse files
Google APIscopybara-github
authored andcommitted
feat: Support safety settings for Gemini voices and deprecate relax_safety_filters
feat: Support `enable_textnorm` for Gemini voices. feat: Mark `advanced_voice_options` as optional. docs: A comment for field `relax_safety_filters` in message `.google.cloud.texttospeech.v1beta1.AdvancedVoiceOptions` is changed docs: A comment for field `advanced_voice_options` in message `.google.cloud.texttospeech.v1beta1.SynthesizeSpeechRequest` is changed docs: A comment for field `markup` in message `.google.cloud.texttospeech.v1beta1.SynthesisInput` is changed docs: A comment for field `markup` in message `.google.cloud.texttospeech.v1beta1.StreamingSynthesisInput` is changed PiperOrigin-RevId: 892460024
1 parent ebd1d23 commit a481d8b

2 files changed

Lines changed: 84 additions & 12 deletions

File tree

google/cloud/texttospeech/v1/cloud_tts.proto

Lines changed: 83 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2025 Google LLC
1+
// Copyright 2026 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -161,18 +161,85 @@ message Voice {
161161

162162
// Used for advanced voice options.
163163
message AdvancedVoiceOptions {
164+
// Safety setting for a single harm category.
165+
message SafetySetting {
166+
// The harm category to apply the safety setting to.
167+
HarmCategory category = 1;
168+
169+
// The harm block threshold for the safety setting.
170+
HarmBlockThreshold threshold = 2;
171+
}
172+
173+
// Safety settings for the request.
174+
message SafetySettings {
175+
// The safety settings for the request.
176+
repeated SafetySetting settings = 1;
177+
}
178+
179+
// Harm categories that will block the content.
180+
enum HarmCategory {
181+
// Default value. This value is unused.
182+
HARM_CATEGORY_UNSPECIFIED = 0;
183+
184+
// Content that promotes violence or incites hatred against individuals or
185+
// groups based on certain attributes.
186+
HARM_CATEGORY_HATE_SPEECH = 1;
187+
188+
// Content that promotes, facilitates, or enables dangerous activities.
189+
HARM_CATEGORY_DANGEROUS_CONTENT = 2;
190+
191+
// Abusive, threatening, or content intended to bully, torment, or ridicule.
192+
HARM_CATEGORY_HARASSMENT = 3;
193+
194+
// Content that contains sexually explicit material.
195+
HARM_CATEGORY_SEXUALLY_EXPLICIT = 4;
196+
}
197+
198+
// Harm block thresholds for the safety settings.
199+
enum HarmBlockThreshold {
200+
// The harm block threshold is unspecified.
201+
HARM_BLOCK_THRESHOLD_UNSPECIFIED = 0;
202+
203+
// Block content with a low harm probability or higher.
204+
BLOCK_LOW_AND_ABOVE = 1;
205+
206+
// Block content with a medium harm probability or higher.
207+
BLOCK_MEDIUM_AND_ABOVE = 2;
208+
209+
// Block content with a high harm probability.
210+
BLOCK_ONLY_HIGH = 3;
211+
212+
// Do not block any content, regardless of its harm probability.
213+
BLOCK_NONE = 4;
214+
215+
// Turn off the safety filter entirely.
216+
OFF = 5;
217+
}
218+
164219
// Only for Journey voices. If false, the synthesis is context aware
165220
// and has a higher latency.
166221
optional bool low_latency_journey_synthesis = 1;
167222

168-
// Optional. Input only. If true, relaxes safety filters for Gemini TTS. Only
169-
// supported for accounts linked to Invoiced (Offline) Cloud billing accounts.
170-
// Otherwise, will return result
171-
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
223+
// Optional. Input only. Deprecated, use safety_settings instead.
224+
// If true, relaxes safety filters for Gemini TTS.
172225
bool relax_safety_filters = 8 [
226+
deprecated = true,
173227
(google.api.field_behavior) = INPUT_ONLY,
174228
(google.api.field_behavior) = OPTIONAL
175229
];
230+
231+
// Optional. Input only. This applies to Gemini TTS only. If set, the category
232+
// specified in the safety setting will be blocked if the harm probability is
233+
// above the threshold. Otherwise, the safety filter will be disabled by
234+
// default.
235+
SafetySettings safety_settings = 9 [
236+
(google.api.field_behavior) = INPUT_ONLY,
237+
(google.api.field_behavior) = OPTIONAL
238+
];
239+
240+
// Optional. If true, textnorm will be applied to text input. This feature is
241+
// enabled by default. Only applies for Gemini TTS.
242+
optional bool enable_textnorm = 2 [(google.api.field_behavior) = OPTIONAL];
176243
}
177244

178245
// The top-level message sent by the client for the `SynthesizeSpeech` method.
@@ -186,8 +253,9 @@ message SynthesizeSpeechRequest {
186253
// Required. The configuration of the synthesized audio.
187254
AudioConfig audio_config = 3 [(google.api.field_behavior) = REQUIRED];
188255

189-
// Advanced voice options.
190-
optional AdvancedVoiceOptions advanced_voice_options = 8;
256+
// Optional. Advanced voice options.
257+
optional AdvancedVoiceOptions advanced_voice_options = 8
258+
[(google.api.field_behavior) = OPTIONAL];
191259
}
192260

193261
// Pronunciation customization for a phrase.
@@ -296,8 +364,8 @@ message SynthesisInput {
296364
// The raw text to be synthesized.
297365
string text = 1;
298366

299-
// Markup for HD voices specifically. This field may not be used with any
300-
// other voices.
367+
// Markup for Chirp 3: HD voices specifically. This field may not be used
368+
// with any other voices.
301369
string markup = 5;
302370

303371
// The SSML document to be synthesized. The SSML document must be valid
@@ -522,6 +590,10 @@ message StreamingSynthesizeConfig {
522590
// be inside a phoneme tag.
523591
CustomPronunciations custom_pronunciations = 5
524592
[(google.api.field_behavior) = OPTIONAL];
593+
594+
// Optional. Advanced voice options.
595+
optional AdvancedVoiceOptions advanced_voice_options = 7
596+
[(google.api.field_behavior) = OPTIONAL];
525597
}
526598

527599
// Input to be synthesized.
@@ -532,8 +604,8 @@ message StreamingSynthesisInput {
532604
// in the output audio.
533605
string text = 1;
534606

535-
// Markup for HD voices specifically. This field may not be used with any
536-
// other voices.
607+
// Markup for Chirp 3: HD voices specifically. This field may not be used
608+
// with any other voices.
537609
string markup = 5;
538610

539611
// Multi-speaker markup for Gemini TTS. This field may not

google/cloud/texttospeech/v1/cloud_tts_lrs.proto

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2025 Google LLC
1+
// Copyright 2026 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.

0 commit comments

Comments
 (0)