Skip to content

Commit ffa39f6

Browse files
authored
feat(ai): Implement session resumption and context window compression for live api (#9795)
1 parent f0813ce commit ffa39f6

20 files changed

Lines changed: 697 additions & 111 deletions

.changeset/little-apples-rush.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
'@firebase/ai': minor
3+
'firebase': minor
4+
---
5+
6+
Added `LiveSession.resumeSession()` to allow resuming a previous `LiveSession`. Also added `contextWindowCompression` feature.

common/api-review/ai.api.md

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,12 @@ export interface Content {
264264
role: Role;
265265
}
266266

267+
// @beta
268+
export interface ContextWindowCompressionConfig {
269+
slidingWindow?: SlidingWindow;
270+
triggerTokens?: number;
271+
}
272+
267273
// @public
268274
export interface CountTokensRequest {
269275
// (undocumented)
@@ -983,6 +989,7 @@ export interface LanguageModelPromptOptions {
983989

984990
// @beta
985991
export interface LiveGenerationConfig {
992+
contextWindowCompression?: ContextWindowCompressionConfig;
986993
frequencyPenalty?: number;
987994
inputAudioTranscription?: AudioTranscriptionConfig;
988995
maxOutputTokens?: number;
@@ -1001,8 +1008,8 @@ export class LiveGenerativeModel extends AIModel {
10011008
//
10021009
// @internal
10031010
constructor(ai: AI, modelParams: LiveModelParams,
1004-
_webSocketHandler: WebSocketHandler);
1005-
connect(): Promise<LiveSession>;
1011+
_webSocketHandler?: WebSocketHandler | undefined);
1012+
connect(sessionResumption?: SessionResumptionConfig): Promise<LiveSession>;
10061013
// (undocumented)
10071014
generationConfig: LiveGenerationConfig;
10081015
// (undocumented)
@@ -1033,6 +1040,7 @@ export const LiveResponseType: {
10331040
TOOL_CALL: string;
10341041
TOOL_CALL_CANCELLATION: string;
10351042
GOING_AWAY_NOTICE: string;
1043+
SESSION_RESUMPTION_UPDATE: string;
10361044
};
10371045

10381046
// @beta
@@ -1072,12 +1080,16 @@ export interface LiveServerToolCallCancellation {
10721080

10731081
// @beta
10741082
export class LiveSession {
1083+
// Warning: (ae-forgotten-export) The symbol "_LiveClientSetup" needs to be exported by the entry point index.d.ts
1084+
//
10751085
// @internal
1076-
constructor(webSocketHandler: WebSocketHandler, serverMessages: AsyncGenerator<unknown>);
1086+
constructor(_setupMessage: _LiveClientSetup, _apiSettings: ApiSettings, _sessionResumption?: SessionResumptionConfig | undefined, webSocketHandler?: WebSocketHandler);
10771087
close(): Promise<void>;
1088+
connectionPromise: Promise<void>;
10781089
inConversation: boolean;
10791090
isClosed: boolean;
1080-
receive(): AsyncGenerator<LiveServerContent | LiveServerToolCall | LiveServerToolCallCancellation | LiveServerGoingAwayNotice>;
1091+
receive(): AsyncGenerator<LiveServerContent | LiveServerToolCall | LiveServerToolCallCancellation | LiveServerGoingAwayNotice | LiveSessionResumptionUpdate>;
1092+
resumeSession(sessionResumption?: SessionResumptionConfig): Promise<void>;
10811093
send(request: string | Array<string | Part>, turnComplete?: boolean): Promise<void>;
10821094
sendAudioRealtime(blob: GenerativeContentBlob): Promise<void>;
10831095
sendFunctionResponses(functionResponses: FunctionResponse[]): Promise<void>;
@@ -1089,6 +1101,15 @@ export class LiveSession {
10891101
sendVideoRealtime(blob: GenerativeContentBlob): Promise<void>;
10901102
}
10911103

1104+
// @beta
1105+
export interface LiveSessionResumptionUpdate {
1106+
lastConsumedClientMessageIndex?: number;
1107+
newHandle?: string;
1108+
resumable?: boolean;
1109+
// (undocumented)
1110+
type: 'sessionResumptionUpdate';
1111+
}
1112+
10921113
// @public
10931114
export const Modality: {
10941115
readonly MODALITY_UNSPECIFIED: "MODALITY_UNSPECIFIED";
@@ -1342,11 +1363,21 @@ export interface Segment {
13421363
text: string;
13431364
}
13441365

1366+
// @beta
1367+
export interface SessionResumptionConfig {
1368+
handle?: string;
1369+
}
1370+
13451371
// @public
13461372
export interface SingleRequestOptions extends RequestOptions {
13471373
signal?: AbortSignal;
13481374
}
13491375

1376+
// @beta
1377+
export interface SlidingWindow {
1378+
targetTokens?: number;
1379+
}
1380+
13501381
// @beta
13511382
export interface SpeechConfig {
13521383
voiceConfig?: VoiceConfig;

docs-devsite/_toc.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ toc:
4444
path: /docs/reference/js/ai.codeexecutiontool.md
4545
- title: Content
4646
path: /docs/reference/js/ai.content.md
47+
- title: ContextWindowCompressionConfig
48+
path: /docs/reference/js/ai.contextwindowcompressionconfig.md
4749
- title: CountTokensRequest
4850
path: /docs/reference/js/ai.counttokensrequest.md
4951
- title: CountTokensResponse
@@ -156,6 +158,8 @@ toc:
156158
path: /docs/reference/js/ai.liveservertoolcallcancellation.md
157159
- title: LiveSession
158160
path: /docs/reference/js/ai.livesession.md
161+
- title: LiveSessionResumptionUpdate
162+
path: /docs/reference/js/ai.livesessionresumptionupdate.md
159163
- title: ModalityTokenCount
160164
path: /docs/reference/js/ai.modalitytokencount.md
161165
- title: ModelParams
@@ -194,8 +198,12 @@ toc:
194198
path: /docs/reference/js/ai.searchentrypoint.md
195199
- title: Segment
196200
path: /docs/reference/js/ai.segment.md
201+
- title: SessionResumptionConfig
202+
path: /docs/reference/js/ai.sessionresumptionconfig.md
197203
- title: SingleRequestOptions
198204
path: /docs/reference/js/ai.singlerequestoptions.md
205+
- title: SlidingWindow
206+
path: /docs/reference/js/ai.slidingwindow.md
199207
- title: SpeechConfig
200208
path: /docs/reference/js/ai.speechconfig.md
201209
- title: StartAudioConversationOptions
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
Project: /docs/reference/js/_project.yaml
2+
Book: /docs/reference/_book.yaml
3+
page_type: reference
4+
5+
{% comment %}
6+
DO NOT EDIT THIS FILE!
7+
This is generated by the JS SDK team, and any local changes will be
8+
overwritten. Changes should be made in the source code at
9+
https://github.com/firebase/firebase-js-sdk
10+
{% endcomment %}
11+
12+
# ContextWindowCompressionConfig interface
13+
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
14+
>
15+
16+
Enables context window compression to manage the model's context window.
17+
18+
This mechanism prevents the context from exceeding a given length.
19+
20+
<b>Signature:</b>
21+
22+
```typescript
23+
export interface ContextWindowCompressionConfig
24+
```
25+
26+
## Properties
27+
28+
| Property | Type | Description |
29+
| --- | --- | --- |
30+
| [slidingWindow](./ai.contextwindowcompressionconfig.md#contextwindowcompressionconfigslidingwindow) | [SlidingWindow](./ai.slidingwindow.md#slidingwindow_interface) | <b><i>(Public Preview)</i></b> The sliding window compression mechanism. |
31+
| [triggerTokens](./ai.contextwindowcompressionconfig.md#contextwindowcompressionconfigtriggertokens) | number | <b><i>(Public Preview)</i></b> The number of tokens (before running a turn) that triggers the context window compression. |
32+
33+
## ContextWindowCompressionConfig.slidingWindow
34+
35+
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
36+
>
37+
38+
The sliding window compression mechanism.
39+
40+
<b>Signature:</b>
41+
42+
```typescript
43+
slidingWindow?: SlidingWindow;
44+
```
45+
46+
## ContextWindowCompressionConfig.triggerTokens
47+
48+
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
49+
>
50+
51+
The number of tokens (before running a turn) that triggers the context window compression.
52+
53+
<b>Signature:</b>
54+
55+
```typescript
56+
triggerTokens?: number;
57+
```

docs-devsite/ai.livegenerationconfig.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ export interface LiveGenerationConfig
2525

2626
| Property | Type | Description |
2727
| --- | --- | --- |
28+
| [contextWindowCompression](./ai.livegenerationconfig.md#livegenerationconfigcontextwindowcompression) | [ContextWindowCompressionConfig](./ai.contextwindowcompressionconfig.md#contextwindowcompressionconfig_interface) | <b><i>(Public Preview)</i></b> The context window compression configuration. |
2829
| [frequencyPenalty](./ai.livegenerationconfig.md#livegenerationconfigfrequencypenalty) | number | <b><i>(Public Preview)</i></b> Frequency penalties. |
2930
| [inputAudioTranscription](./ai.livegenerationconfig.md#livegenerationconfiginputaudiotranscription) | [AudioTranscriptionConfig](./ai.audiotranscriptionconfig.md#audiotranscriptionconfig_interface) | <b><i>(Public Preview)</i></b> Enables transcription of audio input.<!-- -->When enabled, the model will respond with transcriptions of your audio input in the <code>inputTranscriptions</code> property in [LiveServerContent](./ai.liveservercontent.md#liveservercontent_interface) messages. Note that the transcriptions are broken up across messages, so you may only receive small amounts of text per message. For example, if you ask the model "How are you today?", the model may transcribe that input across three messages, broken up as "How a", "re yo", "u today?". |
3031
| [maxOutputTokens](./ai.livegenerationconfig.md#livegenerationconfigmaxoutputtokens) | number | <b><i>(Public Preview)</i></b> Specifies the maximum number of tokens that can be generated in the response. The number of tokens per word varies depending on the language outputted. Is unbounded by default. |
@@ -36,6 +37,19 @@ export interface LiveGenerationConfig
3637
| [topK](./ai.livegenerationconfig.md#livegenerationconfigtopk) | number | <b><i>(Public Preview)</i></b> Changes how the model selects token for output. A <code>topK</code> value of 1 means the select token is the most probable among all tokens in the model's vocabulary, while a <code>topK</code> value 3 means that the next token is selected from among the 3 most probably using probabilities sampled. Tokens are then further filtered with the highest selected <code>temperature</code> sampling. Defaults to 40 if unspecified. |
3738
| [topP](./ai.livegenerationconfig.md#livegenerationconfigtopp) | number | <b><i>(Public Preview)</i></b> Changes how the model selects tokens for output. Tokens are selected from the most to least probable until the sum of their probabilities equals the <code>topP</code> value. For example, if tokens A, B, and C have probabilities of 0.3, 0.2, and 0.1 respectively and the <code>topP</code> value is 0.5, then the model will select either A or B as the next token by using the <code>temperature</code> and exclude C as a candidate. Defaults to 0.95 if unset. |
3839

40+
## LiveGenerationConfig.contextWindowCompression
41+
42+
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
43+
>
44+
45+
The context window compression configuration.
46+
47+
<b>Signature:</b>
48+
49+
```typescript
50+
contextWindowCompression?: ContextWindowCompressionConfig;
51+
```
52+
3953
## LiveGenerationConfig.frequencyPenalty
4054

4155
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.

docs-devsite/ai.livegenerativemodel.md

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ export declare class LiveGenerativeModel extends AIModel
3939
4040
| Method | Modifiers | Description |
4141
| --- | --- | --- |
42-
| [connect()](./ai.livegenerativemodel.md#livegenerativemodelconnect) | | <b><i>(Public Preview)</i></b> Starts a [LiveSession](./ai.livesession.md#livesession_class)<!-- -->. |
42+
| [connect(sessionResumption)](./ai.livegenerativemodel.md#livegenerativemodelconnect) | | <b><i>(Public Preview)</i></b> Starts a [LiveSession](./ai.livesession.md#livesession_class)<!-- -->. |
4343
4444
## LiveGenerativeModel.generationConfig
4545
@@ -95,8 +95,15 @@ Starts a [LiveSession](./ai.livesession.md#livesession_class)<!-- -->.
9595
<b>Signature:</b>
9696
9797
```typescript
98-
connect(): Promise<LiveSession>;
98+
connect(sessionResumption?: SessionResumptionConfig): Promise<LiveSession>;
9999
```
100+
101+
#### Parameters
102+
103+
| Parameter | Type | Description |
104+
| --- | --- | --- |
105+
| sessionResumption | [SessionResumptionConfig](./ai.sessionresumptionconfig.md#sessionresumptionconfig_interface) | |
106+
100107
<b>Returns:</b>
101108
102109
Promise&lt;[LiveSession](./ai.livesession.md#livesession_class)<!-- -->&gt;

docs-devsite/ai.livesession.md

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ export declare class LiveSession
2929

3030
| Property | Modifiers | Type | Description |
3131
| --- | --- | --- | --- |
32+
| [connectionPromise](./ai.livesession.md#livesessionconnectionpromise) | | Promise&lt;void&gt; | <b><i>(Public Preview)</i></b> Allows external code to await the opening of the WebSocket connection. |
3233
| [inConversation](./ai.livesession.md#livesessioninconversation) | | boolean | <b><i>(Public Preview)</i></b> Indicates whether this Live session is being controlled by an <code>AudioConversationController</code>. |
3334
| [isClosed](./ai.livesession.md#livesessionisclosed) | | boolean | <b><i>(Public Preview)</i></b> Indicates whether this Live session is closed. |
3435

@@ -38,6 +39,7 @@ export declare class LiveSession
3839
| --- | --- | --- |
3940
| [close()](./ai.livesession.md#livesessionclose) | | <b><i>(Public Preview)</i></b> Closes this session. All methods on this session will throw an error once this resolves. |
4041
| [receive()](./ai.livesession.md#livesessionreceive) | | <b><i>(Public Preview)</i></b> Yields messages received from the server. This can only be used by one consumer at a time. |
42+
| [resumeSession(sessionResumption)](./ai.livesession.md#livesessionresumesession) | | <b><i>(Public Preview)</i></b> Resumes an existing live session with the server.<!-- -->This closes the current WebSocket connection and establishes a new one using the same configuration (URI, headers, model, system instruction, tools, etc.) as the original session. |
4143
| [send(request, turnComplete)](./ai.livesession.md#livesessionsend) | | <b><i>(Public Preview)</i></b> Sends content to the server. |
4244
| [sendAudioRealtime(blob)](./ai.livesession.md#livesessionsendaudiorealtime) | | <b><i>(Public Preview)</i></b> Sends audio data to the server in realtime. |
4345
| [sendFunctionResponses(functionResponses)](./ai.livesession.md#livesessionsendfunctionresponses) | | <b><i>(Public Preview)</i></b> Sends function responses to the server. |
@@ -46,6 +48,19 @@ export declare class LiveSession
4648
| [sendTextRealtime(text)](./ai.livesession.md#livesessionsendtextrealtime) | | <b><i>(Public Preview)</i></b> Sends text to the server in realtime. |
4749
| [sendVideoRealtime(blob)](./ai.livesession.md#livesessionsendvideorealtime) | | <b><i>(Public Preview)</i></b> Sends video data to the server in realtime. |
4850

51+
## LiveSession.connectionPromise
52+
53+
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
54+
>
55+
56+
Allows external code to await the opening of the WebSocket connection.
57+
58+
<b>Signature:</b>
59+
60+
```typescript
61+
connectionPromise: Promise<void>;
62+
```
63+
4964
## LiveSession.inConversation
5065

5166
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
@@ -98,18 +113,47 @@ Yields messages received from the server. This can only be used by one consumer
98113
<b>Signature:</b>
99114

100115
```typescript
101-
receive(): AsyncGenerator<LiveServerContent | LiveServerToolCall | LiveServerToolCallCancellation | LiveServerGoingAwayNotice>;
116+
receive(): AsyncGenerator<LiveServerContent | LiveServerToolCall | LiveServerToolCallCancellation | LiveServerGoingAwayNotice | LiveSessionResumptionUpdate>;
102117
```
103118
<b>Returns:</b>
104119

105-
AsyncGenerator&lt;[LiveServerContent](./ai.liveservercontent.md#liveservercontent_interface) \| [LiveServerToolCall](./ai.liveservertoolcall.md#liveservertoolcall_interface) \| [LiveServerToolCallCancellation](./ai.liveservertoolcallcancellation.md#liveservertoolcallcancellation_interface) \| [LiveServerGoingAwayNotice](./ai.liveservergoingawaynotice.md#liveservergoingawaynotice_interface)<!-- -->&gt;
120+
AsyncGenerator&lt;[LiveServerContent](./ai.liveservercontent.md#liveservercontent_interface) \| [LiveServerToolCall](./ai.liveservertoolcall.md#liveservertoolcall_interface) \| [LiveServerToolCallCancellation](./ai.liveservertoolcallcancellation.md#liveservertoolcallcancellation_interface) \| [LiveServerGoingAwayNotice](./ai.liveservergoingawaynotice.md#liveservergoingawaynotice_interface) \| [LiveSessionResumptionUpdate](./ai.livesessionresumptionupdate.md#livesessionresumptionupdate_interface)<!-- -->&gt;
106121

107122
An `AsyncGenerator` that yields server messages as they arrive.
108123

109124
#### Exceptions
110125

111126
If the session is already closed, or if we receive a response that we don't support.
112127

128+
## LiveSession.resumeSession()
129+
130+
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.
131+
>
132+
133+
Resumes an existing live session with the server.
134+
135+
This closes the current WebSocket connection and establishes a new one using the same configuration (URI, headers, model, system instruction, tools, etc.) as the original session.
136+
137+
<b>Signature:</b>
138+
139+
```typescript
140+
resumeSession(sessionResumption?: SessionResumptionConfig): Promise<void>;
141+
```
142+
143+
#### Parameters
144+
145+
| Parameter | Type | Description |
146+
| --- | --- | --- |
147+
| sessionResumption | [SessionResumptionConfig](./ai.sessionresumptionconfig.md#sessionresumptionconfig_interface) | The configuration for session resumption, such as the handle to the previous session state to restore. |
148+
149+
<b>Returns:</b>
150+
151+
Promise&lt;void&gt;
152+
153+
#### Exceptions
154+
155+
If the session resumption configuration is unsupported.
156+
113157
## LiveSession.send()
114158

115159
> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment.

0 commit comments

Comments
 (0)