Skip to content

Commit dd88886

Browse files
mbelinkyngutman
andcommitted
fix(ios): guard talk TTS callbacks to active utterance
Co-authored-by: Nimrod Gutman <[email protected]>
1 parent a8dd9ff commit dd88886

File tree

2 files changed

+34
-5
lines changed

2 files changed

+34
-5
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ Docs: https://docs.openclaw.ai
1414
### Fixes
1515

1616
- Docs/security hardening guidance: document Docker `DOCKER-USER` + UFW policy and add cross-linking from Docker install docs for VPS/public-host setups. (#27613) thanks @dorukardahan.
17+
- iOS/Voice timing safety: guard system speech start/finish callbacks to the active utterance to avoid misattributed start events during rapid stop/restart cycles. (#33304) thanks @mbelinky; original implementation direction by @ngutman.
1718
- Docs/tool-loop detection config keys: align `docs/tools/loop-detection.md` examples and field names with the current `tools.loopDetection` schema to prevent copy-paste validation failures from outdated keys. (#33182) Thanks @Mylszd.
1819
- Gateway/session agent discovery: include disk-scanned agent IDs in `listConfiguredAgentIds` even when `agents.list` is configured, so disk-only/ACP agent sessions remain visible in gateway session aggregation and listings. (#32831) thanks @Sid-Qin.
1920
- Discord/inbound debouncer: skip bot-own MESSAGE_CREATE events before they reach the debounce queue to avoid self-triggered slowdowns in busy servers. Thanks @thewilloftheshadow.

apps/shared/OpenClawKit/Sources/OpenClawKit/TalkSystemSpeechSynthesizer.swift

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ public final class TalkSystemSpeechSynthesizer: NSObject {
1212
private let synth = AVSpeechSynthesizer()
1313
private var speakContinuation: CheckedContinuation<Void, Error>?
1414
private var currentUtterance: AVSpeechUtterance?
15+
private var didStartCallback: (() -> Void)?
1516
private var currentToken = UUID()
1617
private var watchdog: Task<Void, Never>?
1718

@@ -26,17 +27,23 @@ public final class TalkSystemSpeechSynthesizer: NSObject {
2627
self.currentToken = UUID()
2728
self.watchdog?.cancel()
2829
self.watchdog = nil
30+
self.didStartCallback = nil
2931
self.synth.stopSpeaking(at: .immediate)
3032
self.finishCurrent(with: SpeakError.canceled)
3133
}
3234

33-
public func speak(text: String, language: String? = nil) async throws {
35+
public func speak(
36+
text: String,
37+
language: String? = nil,
38+
onStart: (() -> Void)? = nil
39+
) async throws {
3440
let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
3541
guard !trimmed.isEmpty else { return }
3642

3743
self.stop()
3844
let token = UUID()
3945
self.currentToken = token
46+
self.didStartCallback = onStart
4047

4148
let utterance = AVSpeechUtterance(string: trimmed)
4249
if let language, let voice = AVSpeechSynthesisVoice(language: language) {
@@ -76,15 +83,21 @@ public final class TalkSystemSpeechSynthesizer: NSObject {
7683
}
7784
}
7885

79-
private func handleFinish(error: Error?) {
80-
guard self.currentUtterance != nil else { return }
86+
private func matchesCurrentUtterance(_ utteranceID: ObjectIdentifier) -> Bool {
87+
guard let currentUtterance = self.currentUtterance else { return false }
88+
return ObjectIdentifier(currentUtterance) == utteranceID
89+
}
90+
91+
private func handleFinish(utteranceID: ObjectIdentifier, error: Error?) {
92+
guard self.matchesCurrentUtterance(utteranceID) else { return }
8193
self.watchdog?.cancel()
8294
self.watchdog = nil
8395
self.finishCurrent(with: error)
8496
}
8597

8698
private func finishCurrent(with error: Error?) {
8799
self.currentUtterance = nil
100+
self.didStartCallback = nil
88101
let cont = self.speakContinuation
89102
self.speakContinuation = nil
90103
if let error {
@@ -96,21 +109,36 @@ public final class TalkSystemSpeechSynthesizer: NSObject {
96109
}
97110

98111
extension TalkSystemSpeechSynthesizer: AVSpeechSynthesizerDelegate {
112+
public nonisolated func speechSynthesizer(
113+
_ synthesizer: AVSpeechSynthesizer,
114+
didStart utterance: AVSpeechUtterance)
115+
{
116+
let utteranceID = ObjectIdentifier(utterance)
117+
Task { @MainActor in
118+
guard self.matchesCurrentUtterance(utteranceID) else { return }
119+
let callback = self.didStartCallback
120+
self.didStartCallback = nil
121+
callback?()
122+
}
123+
}
124+
99125
public nonisolated func speechSynthesizer(
100126
_ synthesizer: AVSpeechSynthesizer,
101127
didFinish utterance: AVSpeechUtterance)
102128
{
129+
let utteranceID = ObjectIdentifier(utterance)
103130
Task { @MainActor in
104-
self.handleFinish(error: nil)
131+
self.handleFinish(utteranceID: utteranceID, error: nil)
105132
}
106133
}
107134

108135
public nonisolated func speechSynthesizer(
109136
_ synthesizer: AVSpeechSynthesizer,
110137
didCancel utterance: AVSpeechUtterance)
111138
{
139+
let utteranceID = ObjectIdentifier(utterance)
112140
Task { @MainActor in
113-
self.handleFinish(error: SpeakError.canceled)
141+
self.handleFinish(utteranceID: utteranceID, error: SpeakError.canceled)
114142
}
115143
}
116144
}

0 commit comments

Comments
 (0)