Skip to content

Commit 60aed95

Browse files
feat(memory): add gemini-embedding-2-preview support (#42501)
Merged via squash. Prepared head SHA: c57b1f8 Co-authored-by: BillChirico <[email protected]> Co-authored-by: gumadeiras <[email protected]> Reviewed-by: @gumadeiras
1 parent 58634c9 commit 60aed95

18 files changed

+838
-37
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ Docs: https://docs.openclaw.ai
1212
- Exec/child commands: mark child command environments with `OPENCLAW_CLI` so subprocesses can detect when they were launched from the OpenClaw CLI. (#41411) Thanks @vincentkoc.
1313
- iOS/Home canvas: add a bundled welcome screen with a live agent overview that refreshes on connect, reconnect, and foreground return, and move the compact connection pill off the top-left canvas overlay. (#42456) Thanks @ngutman.
1414
- iOS/Home canvas: replace floating controls with a docked toolbar, make the bundled home scaffold adapt to smaller phones, and open chat in the resolved main session instead of a synthetic `ios` session. (#42456) Thanks @ngutman.
15+
- Memory/Gemini: add `gemini-embedding-2-preview` memory-search support with configurable output dimensions and automatic reindexing when the configured dimensions change. (#42501) thanks @BillChirico.
1516
- Discord/auto threads: add `autoArchiveDuration` channel config for auto-created threads so Discord thread archiving can stay at 1 hour, 1 day, 3 days, or 1 week instead of always using the 1-hour default. (#35065) Thanks @davidguttman.
1617
- OpenCode/onboarding: add new OpenCode Go provider, treat Zen and Go as one OpenCode setup in the wizard/docs while keeping the runtime providers split, store one shared OpenCode key for both profiles, and stop overriding the built-in `opencode-go` catalog routing. (#42313) Thanks @ImLukeF and @vincentkoc.
1718
- macOS/chat UI: add a chat model picker, persist explicit thinking-level selections across relaunch, and harden provider-aware session model sync for the shared chat composer. (#42314) Thanks @ImLukeF.

docs/concepts/memory.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,29 @@ Notes:
310310
- `remote.baseUrl` is optional (defaults to the Gemini API base URL).
311311
- `remote.headers` lets you add extra headers if needed.
312312
- Default model: `gemini-embedding-001`.
313+
- `gemini-embedding-2-preview` is also supported: 8192 token limit and configurable dimensions (768 / 1536 / 3072, default 3072).
314+
315+
#### Gemini Embedding 2 (preview)
316+
317+
```json5
318+
agents: {
319+
defaults: {
320+
memorySearch: {
321+
provider: "gemini",
322+
model: "gemini-embedding-2-preview",
323+
outputDimensionality: 3072, // optional: 768, 1536, or 3072 (default)
324+
remote: {
325+
apiKey: "YOUR_GEMINI_API_KEY"
326+
}
327+
}
328+
}
329+
}
330+
```
331+
332+
> **⚠️ Re-index required:** Switching from `gemini-embedding-001` (768 dimensions)
333+
> to `gemini-embedding-2-preview` (3072 dimensions) changes the vector size. The same is true if you
334+
> change `outputDimensionality` between 768, 1536, and 3072.
335+
> OpenClaw will automatically reindex when it detects a model or dimension change.
313336
314337
If you want to use a **custom OpenAI-compatible endpoint** (OpenRouter, vLLM, or a proxy),
315338
you can use the `remote` configuration with the OpenAI provider:

src/agents/memory-search.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ export type ResolvedMemorySearchConfig = {
2828
};
2929
fallback: "openai" | "gemini" | "local" | "voyage" | "mistral" | "ollama" | "none";
3030
model: string;
31+
outputDimensionality?: number;
3132
local: {
3233
modelPath?: string;
3334
modelCacheDir?: string;
@@ -193,6 +194,7 @@ function mergeConfig(
193194
? DEFAULT_OLLAMA_MODEL
194195
: undefined;
195196
const model = overrides?.model ?? defaults?.model ?? modelDefault ?? "";
197+
const outputDimensionality = overrides?.outputDimensionality ?? defaults?.outputDimensionality;
196198
const local = {
197199
modelPath: overrides?.local?.modelPath ?? defaults?.local?.modelPath,
198200
modelCacheDir: overrides?.local?.modelCacheDir ?? defaults?.local?.modelCacheDir,
@@ -312,6 +314,7 @@ function mergeConfig(
312314
},
313315
fallback,
314316
model,
317+
outputDimensionality,
315318
local,
316319
store,
317320
chunking: { tokens: Math.max(1, chunking.tokens), overlap },

src/config/schema.help.quality.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ const TARGET_KEYS = [
8383
"agents.defaults.memorySearch.remote.batch.timeoutMinutes",
8484
"agents.defaults.memorySearch.local.modelPath",
8585
"agents.defaults.memorySearch.store.path",
86+
"agents.defaults.memorySearch.outputDimensionality",
8687
"agents.defaults.memorySearch.store.vector.enabled",
8788
"agents.defaults.memorySearch.store.vector.extensionPath",
8889
"agents.defaults.memorySearch.query.hybrid.enabled",

src/config/schema.help.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -785,6 +785,8 @@ export const FIELD_HELP: Record<string, string> = {
785785
'Selects the embedding backend used to build/query memory vectors: "openai", "gemini", "voyage", "mistral", "ollama", or "local". Keep your most reliable provider here and configure fallback for resilience.',
786786
"agents.defaults.memorySearch.model":
787787
"Embedding model override used by the selected memory provider when a non-default model is required. Set this only when you need explicit recall quality/cost tuning beyond provider defaults.",
788+
"agents.defaults.memorySearch.outputDimensionality":
789+
"Gemini embedding-2 only: chooses the output vector size for memory embeddings. Use 768, 1536, or 3072 (default), and expect a full reindex when you change it because stored vector dimensions must stay consistent.",
788790
"agents.defaults.memorySearch.remote.baseUrl":
789791
"Overrides the embedding API endpoint, such as an OpenAI-compatible proxy or custom Gemini base URL. Use this only when routing through your own gateway or vendor endpoint; keep provider defaults otherwise.",
790792
"agents.defaults.memorySearch.remote.apiKey":

src/config/schema.labels.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,7 @@ export const FIELD_LABELS: Record<string, string> = {
331331
"agents.defaults.memorySearch.remote.batch.pollIntervalMs": "Remote Batch Poll Interval (ms)",
332332
"agents.defaults.memorySearch.remote.batch.timeoutMinutes": "Remote Batch Timeout (min)",
333333
"agents.defaults.memorySearch.model": "Memory Search Model",
334+
"agents.defaults.memorySearch.outputDimensionality": "Memory Search Output Dimensionality",
334335
"agents.defaults.memorySearch.fallback": "Memory Search Fallback",
335336
"agents.defaults.memorySearch.local.modelPath": "Local Embedding Model Path",
336337
"agents.defaults.memorySearch.store.path": "Memory Search Index Path",

src/config/types.tools.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,11 @@ export type MemorySearchConfig = {
347347
fallback?: "openai" | "gemini" | "local" | "voyage" | "mistral" | "ollama" | "none";
348348
/** Embedding model id (remote) or alias (local). */
349349
model?: string;
350+
/**
351+
* Gemini embedding-2 models only: output vector dimensions.
352+
* Supported values today are 768, 1536, and 3072.
353+
*/
354+
outputDimensionality?: number;
350355
/** Local embedding settings (node-llama-cpp). */
351356
local?: {
352357
/** GGUF model path or hf: URI. */

src/config/zod-schema.agent-runtime.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,7 @@ export const MemorySearchSchema = z
599599
])
600600
.optional(),
601601
model: z.string().optional(),
602+
outputDimensionality: z.number().int().positive().optional(),
602603
local: z
603604
.object({
604605
modelPath: z.string().optional(),

src/memory/batch-gemini.test.ts

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
2+
import type { GeminiEmbeddingClient } from "./embeddings-gemini.js";
3+
4+
describe("runGeminiEmbeddingBatches", () => {
5+
let runGeminiEmbeddingBatches: typeof import("./batch-gemini.js").runGeminiEmbeddingBatches;
6+
7+
beforeAll(async () => {
8+
({ runGeminiEmbeddingBatches } = await import("./batch-gemini.js"));
9+
});
10+
11+
afterEach(() => {
12+
vi.resetAllMocks();
13+
vi.unstubAllGlobals();
14+
});
15+
16+
const mockClient: GeminiEmbeddingClient = {
17+
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
18+
headers: {},
19+
model: "gemini-embedding-2-preview",
20+
modelPath: "models/gemini-embedding-2-preview",
21+
apiKeys: ["test-key"],
22+
outputDimensionality: 1536,
23+
};
24+
25+
it("includes outputDimensionality in batch upload requests", async () => {
26+
const fetchMock = vi.fn(async (input: RequestInfo | URL, init?: RequestInit) => {
27+
const url =
28+
typeof input === "string" ? input : input instanceof URL ? input.toString() : input.url;
29+
if (url.includes("/upload/v1beta/files?uploadType=multipart")) {
30+
const body = init?.body;
31+
if (!(body instanceof Blob)) {
32+
throw new Error("expected multipart blob body");
33+
}
34+
const text = await body.text();
35+
expect(text).toContain('"taskType":"RETRIEVAL_DOCUMENT"');
36+
expect(text).toContain('"outputDimensionality":1536');
37+
return new Response(JSON.stringify({ name: "files/file-123" }), {
38+
status: 200,
39+
headers: { "Content-Type": "application/json" },
40+
});
41+
}
42+
if (url.endsWith(":asyncBatchEmbedContent")) {
43+
return new Response(
44+
JSON.stringify({
45+
name: "batches/batch-1",
46+
state: "COMPLETED",
47+
outputConfig: { file: "files/output-1" },
48+
}),
49+
{
50+
status: 200,
51+
headers: { "Content-Type": "application/json" },
52+
},
53+
);
54+
}
55+
if (url.endsWith("/files/output-1:download")) {
56+
return new Response(
57+
JSON.stringify({
58+
key: "req-1",
59+
response: { embedding: { values: [0.1, 0.2, 0.3] } },
60+
}),
61+
{
62+
status: 200,
63+
headers: { "Content-Type": "application/jsonl" },
64+
},
65+
);
66+
}
67+
throw new Error(`unexpected fetch ${url}`);
68+
});
69+
70+
vi.stubGlobal("fetch", fetchMock);
71+
72+
const results = await runGeminiEmbeddingBatches({
73+
gemini: mockClient,
74+
agentId: "main",
75+
requests: [
76+
{
77+
custom_id: "req-1",
78+
request: {
79+
content: { parts: [{ text: "hello world" }] },
80+
taskType: "RETRIEVAL_DOCUMENT",
81+
outputDimensionality: 1536,
82+
},
83+
},
84+
],
85+
wait: true,
86+
pollIntervalMs: 1,
87+
timeoutMs: 1000,
88+
concurrency: 1,
89+
});
90+
91+
expect(results.get("req-1")).toEqual([0.1, 0.2, 0.3]);
92+
expect(fetchMock).toHaveBeenCalledTimes(3);
93+
});
94+
});

src/memory/batch-gemini.ts

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,13 @@ import {
55
} from "./batch-runner.js";
66
import { buildBatchHeaders, normalizeBatchBaseUrl } from "./batch-utils.js";
77
import { debugEmbeddingsLog } from "./embeddings-debug.js";
8-
import type { GeminiEmbeddingClient } from "./embeddings-gemini.js";
8+
import type { GeminiEmbeddingClient, GeminiTextEmbeddingRequest } from "./embeddings-gemini.js";
99
import { hashText } from "./internal.js";
1010
import { withRemoteHttpResponse } from "./remote-http.js";
1111

1212
export type GeminiBatchRequest = {
1313
custom_id: string;
14-
content: { parts: Array<{ text: string }> };
15-
taskType: "RETRIEVAL_DOCUMENT" | "RETRIEVAL_QUERY";
14+
request: GeminiTextEmbeddingRequest;
1615
};
1716

1817
export type GeminiBatchStatus = {
@@ -82,10 +81,7 @@ async function submitGeminiBatch(params: {
8281
.map((request) =>
8382
JSON.stringify({
8483
key: request.custom_id,
85-
request: {
86-
content: request.content,
87-
task_type: request.taskType,
88-
},
84+
request: request.request,
8985
}),
9086
)
9187
.join("\n");

0 commit comments

Comments
 (0)