Skip to content

Commit 47e412b

Browse files
committed
fix(review): preserve talk directive overrides
1 parent 4a0341e commit 47e412b

File tree

7 files changed

+70
-4
lines changed

7 files changed

+70
-4
lines changed

apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,9 @@ class TalkModeManager(
723723
TalkModeRuntime.validatedLanguage(directive?.language)?.let {
724724
put("language", JsonPrimitive(it))
725725
}
726+
directive?.outputFormat?.trim()?.takeIf { it.isNotEmpty() }?.let {
727+
put("outputFormat", JsonPrimitive(it))
728+
}
726729
}
727730
val res = session.request("talk.speak", params.toString())
728731
val root = json.parseToJsonElement(res).asObjectOrNull() ?: error("talk.speak returned invalid JSON")

src/gateway/protocol/schema/channels.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ export const TalkSpeakParamsSchema = Type.Object(
2121
text: NonEmptyString,
2222
voiceId: Type.Optional(Type.String()),
2323
modelId: Type.Optional(Type.String()),
24+
outputFormat: Type.Optional(Type.String()),
2425
speed: Type.Optional(Type.Number()),
2526
stability: Type.Optional(Type.Number()),
2627
similarity: Type.Optional(Type.Number()),

src/gateway/server-methods/talk.ts

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,13 @@ function resolveTalkVoiceId(
6969
if (!aliases) {
7070
return requested;
7171
}
72-
return aliases[normalizeAliasKey(requested)] ?? requested;
72+
const normalizedRequested = normalizeAliasKey(requested);
73+
for (const [alias, voiceId] of Object.entries(aliases)) {
74+
if (normalizeAliasKey(alias) === normalizedRequested) {
75+
return voiceId;
76+
}
77+
}
78+
return requested;
7379
}
7480

7581
function readTalkVoiceSettings(
@@ -189,6 +195,7 @@ function buildTalkSpeakOverrides(
189195
): TtsDirectiveOverrides {
190196
const voiceId = resolveTalkVoiceId(providerConfig, trimString(params.voiceId));
191197
const modelId = trimString(params.modelId);
198+
const outputFormat = trimString(params.outputFormat);
192199
const speed = finiteNumber(params.speed);
193200
const seed = finiteNumber(params.seed);
194201
const normalize = normalizeTextNormalization(params.normalize);
@@ -212,6 +219,7 @@ function buildTalkSpeakOverrides(
212219
overrides.elevenlabs = {
213220
...(voiceId == null ? {} : { voiceId }),
214221
...(modelId == null ? {} : { modelId }),
222+
...(outputFormat == null ? {} : { outputFormat }),
215223
...(seed == null ? {} : { seed }),
216224
...(normalize == null ? {} : { applyTextNormalization: normalize }),
217225
...(language == null ? {} : { languageCode: language }),
@@ -230,7 +238,10 @@ function buildTalkSpeakOverrides(
230238
}
231239

232240
if (provider === "microsoft") {
233-
overrides.microsoft = voiceId == null ? undefined : { voice: voiceId };
241+
overrides.microsoft = {
242+
...(voiceId == null ? {} : { voice: voiceId }),
243+
...(outputFormat == null ? {} : { outputFormat }),
244+
};
234245
}
235246

236247
return overrides;

src/gateway/server.talk-config.test.ts

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,4 +301,51 @@ describe("gateway talk.config", () => {
301301
globalThis.fetch = originalFetch;
302302
}
303303
});
304+
305+
it("resolves talk voice aliases case-insensitively and forwards output format", async () => {
306+
const { writeConfigFile } = await import("../config/config.js");
307+
await writeConfigFile({
308+
talk: {
309+
provider: "elevenlabs",
310+
providers: {
311+
elevenlabs: {
312+
apiKey: "elevenlabs-talk-key", // pragma: allowlist secret
313+
voiceId: "voice-default",
314+
voiceAliases: {
315+
Clawd: "EXAVITQu4vr4xnSDxMaL",
316+
},
317+
},
318+
},
319+
},
320+
});
321+
322+
const originalFetch = globalThis.fetch;
323+
let fetchUrl: string | undefined;
324+
const fetchMock = vi.fn(async (input: RequestInfo | URL) => {
325+
fetchUrl = typeof input === "string" ? input : input instanceof URL ? input.href : input.url;
326+
return new Response(new Uint8Array([4, 5, 6]), { status: 200 });
327+
});
328+
globalThis.fetch = fetchMock as typeof fetch;
329+
330+
try {
331+
await withServer(async (ws) => {
332+
await connectOperator(ws, ["operator.read", "operator.write"]);
333+
const res = await fetchTalkSpeak(ws, {
334+
text: "Hello from talk mode.",
335+
voiceId: "clawd",
336+
outputFormat: "pcm_44100",
337+
});
338+
expect(res.ok).toBe(true);
339+
expect(res.payload?.provider).toBe("elevenlabs");
340+
expect(res.payload?.outputFormat).toBe("pcm_44100");
341+
expect(res.payload?.audioBase64).toBe(Buffer.from([4, 5, 6]).toString("base64"));
342+
});
343+
344+
expect(fetchMock).toHaveBeenCalled();
345+
expect(fetchUrl).toContain("/v1/text-to-speech/EXAVITQu4vr4xnSDxMaL");
346+
expect(fetchUrl).toContain("output_format=pcm_44100");
347+
} finally {
348+
globalThis.fetch = originalFetch;
349+
}
350+
});
304351
});

src/tts/providers/elevenlabs.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@ export function buildElevenLabsSpeechProvider(): SpeechProviderPlugin {
7272
if (!apiKey) {
7373
throw new Error("ElevenLabs API key missing");
7474
}
75-
const outputFormat = req.target === "voice-note" ? "opus_48000_64" : "mp3_44100_128";
75+
const outputFormat =
76+
req.overrides?.elevenlabs?.outputFormat ??
77+
(req.target === "voice-note" ? "opus_48000_64" : "mp3_44100_128");
7678
const audioBuffer = await elevenLabsTTS({
7779
text: req.text,
7880
apiKey,

src/tts/providers/microsoft.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ export function buildMicrosoftSpeechProvider(): SpeechProviderPlugin {
8383
const tempRoot = resolvePreferredOpenClawTmpDir();
8484
mkdirSync(tempRoot, { recursive: true, mode: 0o700 });
8585
const tempDir = mkdtempSync(path.join(tempRoot, "tts-microsoft-"));
86-
let outputFormat = req.config.edge.outputFormat;
86+
let outputFormat = req.overrides?.microsoft?.outputFormat ?? req.config.edge.outputFormat;
8787
const fallbackOutputFormat =
8888
outputFormat !== DEFAULT_EDGE_OUTPUT_FORMAT ? DEFAULT_EDGE_OUTPUT_FORMAT : undefined;
8989

src/tts/tts.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,13 +167,15 @@ export type TtsDirectiveOverrides = {
167167
elevenlabs?: {
168168
voiceId?: string;
169169
modelId?: string;
170+
outputFormat?: string;
170171
seed?: number;
171172
applyTextNormalization?: "auto" | "on" | "off";
172173
languageCode?: string;
173174
voiceSettings?: Partial<ResolvedTtsConfig["elevenlabs"]["voiceSettings"]>;
174175
};
175176
microsoft?: {
176177
voice?: string;
178+
outputFormat?: string;
177179
};
178180
};
179181

0 commit comments

Comments
 (0)