@@ -131,6 +131,41 @@ private let defaultOperatorConnectScopes: [String] = [
131131 " operator.pairing " ,
132132]
133133
134+ private enum GatewayConnectErrorCodes {
135+ static let authTokenMismatch = " AUTH_TOKEN_MISMATCH "
136+ static let authDeviceTokenMismatch = " AUTH_DEVICE_TOKEN_MISMATCH "
137+ static let authTokenMissing = " AUTH_TOKEN_MISSING "
138+ static let authPasswordMissing = " AUTH_PASSWORD_MISSING "
139+ static let authPasswordMismatch = " AUTH_PASSWORD_MISMATCH "
140+ static let authRateLimited = " AUTH_RATE_LIMITED "
141+ static let pairingRequired = " PAIRING_REQUIRED "
142+ static let controlUiDeviceIdentityRequired = " CONTROL_UI_DEVICE_IDENTITY_REQUIRED "
143+ static let deviceIdentityRequired = " DEVICE_IDENTITY_REQUIRED "
144+ }
145+
146+ private struct GatewayConnectAuthError : LocalizedError {
147+ let message : String
148+ let detailCode : String ?
149+ let canRetryWithDeviceToken : Bool
150+
151+ var errorDescription : String ? { self . message }
152+
153+ var isNonRecoverable : Bool {
154+ switch self . detailCode {
155+ case GatewayConnectErrorCodes . authTokenMissing,
156+ GatewayConnectErrorCodes . authPasswordMissing,
157+ GatewayConnectErrorCodes . authPasswordMismatch,
158+ GatewayConnectErrorCodes . authRateLimited,
159+ GatewayConnectErrorCodes . pairingRequired,
160+ GatewayConnectErrorCodes . controlUiDeviceIdentityRequired,
161+ GatewayConnectErrorCodes . deviceIdentityRequired:
162+ return true
163+ default :
164+ return false
165+ }
166+ }
167+ }
168+
134169public actor GatewayChannelActor {
135170 private let logger = Logger ( subsystem: " ai.openclaw " , category: " gateway " )
136171 private var task : WebSocketTaskBox ?
@@ -160,6 +195,9 @@ public actor GatewayChannelActor {
160195 private var watchdogTask : Task < Void , Never > ?
161196 private var tickTask : Task < Void , Never > ?
162197 private var keepaliveTask : Task < Void , Never > ?
198+ private var pendingDeviceTokenRetry = false
199+ private var deviceTokenRetryBudgetUsed = false
200+ private var reconnectPausedForAuthFailure = false
163201 private let defaultRequestTimeoutMs : Double = 15000
164202 private let pushHandler : ( @Sendable ( GatewayPush) async -> Void ) ?
165203 private let connectOptions : GatewayConnectOptions ?
@@ -232,10 +270,19 @@ public actor GatewayChannelActor {
232270 while self . shouldReconnect {
233271 guard await self . sleepUnlessCancelled ( nanoseconds: 30 * 1_000_000_000 ) else { return } // 30s cadence
234272 guard self . shouldReconnect else { return }
273+ if self . reconnectPausedForAuthFailure { continue }
235274 if self . connected { continue }
236275 do {
237276 try await self . connect ( )
238277 } catch {
278+ if self . shouldPauseReconnectAfterAuthFailure ( error) {
279+ self . reconnectPausedForAuthFailure = true
280+ self . logger. error (
281+ " gateway watchdog reconnect paused for non-recoverable auth failure " +
282+ " \( error. localizedDescription, privacy: . public) "
283+ )
284+ continue
285+ }
239286 let wrapped = self . wrap ( error, context: " gateway watchdog reconnect " )
240287 self . logger. error ( " gateway watchdog reconnect failed \( wrapped. localizedDescription, privacy: . public) " )
241288 }
@@ -267,7 +314,12 @@ public actor GatewayChannelActor {
267314 } ,
268315 operation: { try await self . sendConnect ( ) } )
269316 } catch {
270- let wrapped = self . wrap ( error, context: " connect to gateway @ \( self . url. absoluteString) " )
317+ let wrapped : Error
318+ if let authError = error as? GatewayConnectAuthError {
319+ wrapped = authError
320+ } else {
321+ wrapped = self . wrap ( error, context: " connect to gateway @ \( self . url. absoluteString) " )
322+ }
271323 self . connected = false
272324 self . task? . cancel ( with: . goingAway, reason: nil )
273325 await self . disconnectHandler ? ( " connect failed: \( wrapped. localizedDescription) " )
@@ -281,6 +333,7 @@ public actor GatewayChannelActor {
281333 }
282334 self . listen ( )
283335 self . connected = true
336+ self . reconnectPausedForAuthFailure = false
284337 self . backoffMs = 500
285338 self . lastSeq = nil
286339 self . startKeepalive ( )
@@ -371,11 +424,18 @@ public actor GatewayChannelActor {
371424 ( includeDeviceIdentity && identity != nil )
372425 ? DeviceAuthStore . loadToken ( deviceId: identity!. deviceId, role: role) ? . token
373426 : nil
374- // If we're not sending a device identity, a device token can't be validated server-side.
375- // In that mode we always use the shared gateway token/password.
376- let authToken = includeDeviceIdentity ? ( storedToken ?? self . token) : self . token
427+ let shouldUseDeviceRetryToken =
428+ includeDeviceIdentity && self . pendingDeviceTokenRetry &&
429+ storedToken != nil && self . token != nil && self . isTrustedDeviceRetryEndpoint ( )
430+ if shouldUseDeviceRetryToken {
431+ self . pendingDeviceTokenRetry = false
432+ }
433+ // Keep shared credentials explicit when provided. Device token retry is attached
434+ // only on a bounded second attempt after token mismatch.
435+ let authToken = self . token ?? ( includeDeviceIdentity ? storedToken : nil )
436+ let authDeviceToken = shouldUseDeviceRetryToken ? storedToken : nil
377437 let authSource : GatewayAuthSource
378- if storedToken != nil {
438+ if authDeviceToken != nil || ( self . token == nil && storedToken != nil ) {
379439 authSource = . deviceToken
380440 } else if authToken != nil {
381441 authSource = . sharedToken
@@ -386,9 +446,12 @@ public actor GatewayChannelActor {
386446 }
387447 self . lastAuthSource = authSource
388448 self . logger. info ( " gateway connect auth= \( authSource. rawValue, privacy: . public) " )
389- let canFallbackToShared = includeDeviceIdentity && storedToken != nil && self . token != nil
390449 if let authToken {
391- params [ " auth " ] = ProtoAnyCodable ( [ " token " : ProtoAnyCodable ( authToken) ] )
450+ var auth : [ String : ProtoAnyCodable ] = [ " token " : ProtoAnyCodable ( authToken) ]
451+ if let authDeviceToken {
452+ auth [ " deviceToken " ] = ProtoAnyCodable ( authDeviceToken)
453+ }
454+ params [ " auth " ] = ProtoAnyCodable ( auth)
392455 } else if let password = self . password {
393456 params [ " auth " ] = ProtoAnyCodable ( [ " password " : ProtoAnyCodable ( password) ] )
394457 }
@@ -426,11 +489,24 @@ public actor GatewayChannelActor {
426489 do {
427490 let response = try await self . waitForConnectResponse ( reqId: reqId)
428491 try await self . handleConnectResponse ( response, identity: identity, role: role)
492+ self . pendingDeviceTokenRetry = false
493+ self . deviceTokenRetryBudgetUsed = false
429494 } catch {
430- if canFallbackToShared {
431- if let identity {
432- DeviceAuthStore . clearToken ( deviceId: identity. deviceId, role: role)
433- }
495+ let shouldRetryWithDeviceToken = self . shouldRetryWithStoredDeviceToken (
496+ error: error,
497+ explicitGatewayToken: self . token,
498+ storedToken: storedToken,
499+ attemptedDeviceTokenRetry: authDeviceToken != nil )
500+ if shouldRetryWithDeviceToken {
501+ self . pendingDeviceTokenRetry = true
502+ self . deviceTokenRetryBudgetUsed = true
503+ self . backoffMs = min ( self . backoffMs, 250 )
504+ } else if authDeviceToken != nil ,
505+ let identity,
506+ self . shouldClearStoredDeviceTokenAfterRetry ( error)
507+ {
508+ // Retry failed with an explicit device-token mismatch; clear stale local token.
509+ DeviceAuthStore . clearToken ( deviceId: identity. deviceId, role: role)
434510 }
435511 throw error
436512 }
@@ -443,7 +519,13 @@ public actor GatewayChannelActor {
443519 ) async throws {
444520 if res. ok == false {
445521 let msg = ( res. error ? [ " message " ] ? . value as? String ) ?? " gateway connect failed "
446- throw NSError ( domain: " Gateway " , code: 1008 , userInfo: [ NSLocalizedDescriptionKey: msg] )
522+ let details = res. error ? [ " details " ] ? . value as? [ String : ProtoAnyCodable ]
523+ let detailCode = details ? [ " code " ] ? . value as? String
524+ let canRetryWithDeviceToken = details ? [ " canRetryWithDeviceToken " ] ? . value as? Bool ?? false
525+ throw GatewayConnectAuthError (
526+ message: msg,
527+ detailCode: detailCode,
528+ canRetryWithDeviceToken: canRetryWithDeviceToken)
447529 }
448530 guard let payload = res. payload else {
449531 throw NSError (
@@ -616,19 +698,91 @@ public actor GatewayChannelActor {
616698
617699 private func scheduleReconnect( ) async {
618700 guard self . shouldReconnect else { return }
701+ guard !self . reconnectPausedForAuthFailure else { return }
619702 let delay = self . backoffMs / 1000
620703 self . backoffMs = min ( self . backoffMs * 2 , 30000 )
621704 guard await self . sleepUnlessCancelled ( nanoseconds: UInt64 ( delay * 1_000_000_000 ) ) else { return }
622705 guard self . shouldReconnect else { return }
706+ guard !self . reconnectPausedForAuthFailure else { return }
623707 do {
624708 try await self . connect ( )
625709 } catch {
710+ if self . shouldPauseReconnectAfterAuthFailure ( error) {
711+ self . reconnectPausedForAuthFailure = true
712+ self . logger. error (
713+ " gateway reconnect paused for non-recoverable auth failure " +
714+ " \( error. localizedDescription, privacy: . public) "
715+ )
716+ return
717+ }
626718 let wrapped = self . wrap ( error, context: " gateway reconnect " )
627719 self . logger. error ( " gateway reconnect failed \( wrapped. localizedDescription, privacy: . public) " )
628720 await self . scheduleReconnect ( )
629721 }
630722 }
631723
724+ private func shouldRetryWithStoredDeviceToken(
725+ error: Error ,
726+ explicitGatewayToken: String ? ,
727+ storedToken: String ? ,
728+ attemptedDeviceTokenRetry: Bool
729+ ) -> Bool {
730+ if self . deviceTokenRetryBudgetUsed {
731+ return false
732+ }
733+ if attemptedDeviceTokenRetry {
734+ return false
735+ }
736+ guard explicitGatewayToken != nil , storedToken != nil else {
737+ return false
738+ }
739+ guard self . isTrustedDeviceRetryEndpoint ( ) else {
740+ return false
741+ }
742+ guard let authError = error as? GatewayConnectAuthError else {
743+ return false
744+ }
745+ return authError. canRetryWithDeviceToken ||
746+ authError. detailCode == GatewayConnectErrorCodes . authTokenMismatch
747+ }
748+
749+ private func shouldPauseReconnectAfterAuthFailure( _ error: Error ) -> Bool {
750+ guard let authError = error as? GatewayConnectAuthError else {
751+ return false
752+ }
753+ if authError. isNonRecoverable {
754+ return true
755+ }
756+ if authError. detailCode == GatewayConnectErrorCodes . authTokenMismatch &&
757+ self . deviceTokenRetryBudgetUsed && !self . pendingDeviceTokenRetry
758+ {
759+ return true
760+ }
761+ return false
762+ }
763+
764+ private func shouldClearStoredDeviceTokenAfterRetry( _ error: Error ) -> Bool {
765+ guard let authError = error as? GatewayConnectAuthError else {
766+ return false
767+ }
768+ return authError. detailCode == GatewayConnectErrorCodes . authDeviceTokenMismatch
769+ }
770+
771+ private func isTrustedDeviceRetryEndpoint( ) -> Bool {
772+ // This client currently treats loopback as the only trusted retry target.
773+ // Unlike the Node gateway client, it does not yet expose a pinned TLS-fingerprint
774+ // trust path for remote retry, so remote fallback remains disabled by default.
775+ guard let host = self . url. host? . trimmingCharacters ( in: . whitespacesAndNewlines) . lowercased ( ) ,
776+ !host. isEmpty
777+ else {
778+ return false
779+ }
780+ if host == " localhost " || host == " ::1 " || host == " 127.0.0.1 " || host. hasPrefix ( " 127. " ) {
781+ return true
782+ }
783+ return false
784+ }
785+
632786 private nonisolated func sleepUnlessCancelled( nanoseconds: UInt64 ) async -> Bool {
633787 do {
634788 try await Task . sleep ( nanoseconds: nanoseconds)
@@ -756,7 +910,8 @@ public actor GatewayChannelActor {
756910 return ( id: id, data: data)
757911 } catch {
758912 self . logger. error (
759- " gateway \( kind) encode failed \( method, privacy: . public) error= \( error. localizedDescription, privacy: . public) " )
913+ " gateway \( kind) encode failed \( method, privacy: . public) " +
914+ " error= \( error. localizedDescription, privacy: . public) " )
760915 throw error
761916 }
762917 }
0 commit comments