Skip to content
/ core Public

Commit 8c2afef

Browse files
committed
feat(ai): implement Vercel AI Gateway prompt caching in OpenAICompatibleRuntime
- Added baseURL detection for Vercel AI Gateway to enable automatic caching for chat completion requests. - Introduced helper methods to decorate request parameters with `providerOptions.gateway.caching = 'auto'` for compatible endpoints. - Updated `generateText`, `generateStructured`, and `generateTextStream` methods to utilize the new caching logic. - Created unit tests to verify caching behavior for both gateway and non-gateway endpoints. Signed-off-by: Innei <[email protected]>
1 parent 15e9b29 commit 8c2afef

File tree

4 files changed

+353
-17
lines changed

4 files changed

+353
-17
lines changed

apps/core/src/modules/ai/runtime/openai-compatible.runtime.ts

Lines changed: 55 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import type {
2121
export class OpenAICompatibleRuntime extends BaseRuntime {
2222
readonly providerInfo: RuntimeProviderInfo
2323
private readonly client: OpenAI
24+
private readonly baseURL: string
2425

2526
constructor(config: RuntimeConfig) {
2627
super()
@@ -31,6 +32,7 @@ export class OpenAICompatibleRuntime extends BaseRuntime {
3132
}
3233

3334
const baseURL = this.resolveBaseURL(config)
35+
this.baseURL = baseURL
3436
this.client = new OpenAI({
3537
apiKey: config.apiKey,
3638
baseURL,
@@ -62,6 +64,38 @@ export class OpenAICompatibleRuntime extends BaseRuntime {
6264
return normalized
6365
}
6466

67+
private isVercelAiGateway(): boolean {
68+
try {
69+
return new URL(this.baseURL).hostname === 'ai-gateway.vercel.sh'
70+
} catch {
71+
return false
72+
}
73+
}
74+
75+
private withGatewayPromptCache<T extends Record<string, unknown>>(
76+
params: T,
77+
): T {
78+
if (!this.isVercelAiGateway()) {
79+
return params
80+
}
81+
82+
const providerOptions = (
83+
params as { providerOptions?: Record<string, any> }
84+
).providerOptions
85+
const gatewayOptions = providerOptions?.gateway
86+
87+
return {
88+
...params,
89+
providerOptions: {
90+
...providerOptions,
91+
gateway: {
92+
...gatewayOptions,
93+
caching: 'auto',
94+
},
95+
},
96+
}
97+
}
98+
6599
async generateText(
66100
options: GenerateTextOptions,
67101
): Promise<GenerateTextResult> {
@@ -84,13 +118,15 @@ export class OpenAICompatibleRuntime extends BaseRuntime {
84118
: undefined
85119

86120
return this.withRetry(async () => {
87-
const response = await this.client.chat.completions.create({
88-
model: this.providerInfo.model,
89-
messages: chatMessages,
90-
temperature,
91-
max_tokens: maxTokens,
92-
reasoning_effort: openaiReasoningEffort,
93-
} as OpenAI.ChatCompletionCreateParamsNonStreaming)
121+
const response = await this.client.chat.completions.create(
122+
this.withGatewayPromptCache({
123+
model: this.providerInfo.model,
124+
messages: chatMessages,
125+
temperature,
126+
max_tokens: maxTokens,
127+
reasoning_effort: openaiReasoningEffort,
128+
}) as OpenAI.ChatCompletionCreateParamsNonStreaming,
129+
)
94130

95131
const choice = response.choices[0]
96132
return {
@@ -163,14 +199,16 @@ export class OpenAICompatibleRuntime extends BaseRuntime {
163199
}
164200

165201
for (let i = 0; i < maxIterations; i++) {
166-
const response = await this.client.chat.completions.create({
167-
model: this.providerInfo.model,
168-
messages: conversationMessages,
169-
temperature,
170-
max_tokens: maxTokens,
171-
reasoning_effort: openaiReasoningEffort,
172-
...toolConfig,
173-
} as OpenAI.ChatCompletionCreateParamsNonStreaming)
202+
const response = await this.client.chat.completions.create(
203+
this.withGatewayPromptCache({
204+
model: this.providerInfo.model,
205+
messages: conversationMessages,
206+
temperature,
207+
max_tokens: maxTokens,
208+
reasoning_effort: openaiReasoningEffort,
209+
...toolConfig,
210+
}) as OpenAI.ChatCompletionCreateParamsNonStreaming,
211+
)
174212

175213
if (response.usage) {
176214
totalUsage.promptTokens += response.usage.prompt_tokens
@@ -231,14 +269,14 @@ export class OpenAICompatibleRuntime extends BaseRuntime {
231269
: undefined
232270

233271
const response = await this.client.chat.completions.create(
234-
{
272+
this.withGatewayPromptCache({
235273
model: this.providerInfo.model,
236274
messages: chatMessages,
237275
temperature,
238276
max_tokens: maxTokens,
239277
stream: true,
240278
reasoning_effort: openaiReasoningEffort,
241-
} as OpenAI.ChatCompletionCreateParams & { stream: true },
279+
}) as OpenAI.ChatCompletionCreateParams & { stream: true },
242280
{ signal },
243281
)
244282

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
import { beforeEach, describe, expect, it, vi } from 'vitest'
2+
import { z } from 'zod'
3+
4+
import { AIProviderType } from '../../../../src/modules/ai/ai.types'
5+
import { OpenAICompatibleRuntime } from '../../../../src/modules/ai/runtime/openai-compatible.runtime'
6+
7+
const { createMock } = vi.hoisted(() => ({
8+
createMock: vi.fn(),
9+
}))
10+
11+
vi.mock('openai', () => ({
12+
default: class MockOpenAI {
13+
chat = {
14+
completions: {
15+
create: createMock,
16+
},
17+
}
18+
19+
constructor(_: unknown) {}
20+
},
21+
}))
22+
23+
describe('OpenAICompatibleRuntime prompt caching', () => {
24+
beforeEach(() => {
25+
createMock.mockReset()
26+
})
27+
28+
it('adds Vercel gateway automatic caching to text requests', async () => {
29+
createMock.mockResolvedValueOnce({
30+
choices: [{ message: { content: 'cached' } }],
31+
usage: {
32+
prompt_tokens: 10,
33+
completion_tokens: 2,
34+
total_tokens: 12,
35+
},
36+
})
37+
38+
const runtime = new OpenAICompatibleRuntime({
39+
apiKey: 'test-key',
40+
endpoint: 'https://ai-gateway.vercel.sh',
41+
model: 'anthropic/claude-sonnet-4.6',
42+
providerType: AIProviderType.OpenAICompatible,
43+
providerId: 'vercel-gateway',
44+
})
45+
46+
await runtime.generateText({ prompt: 'hello' })
47+
48+
expect(createMock).toHaveBeenCalledTimes(1)
49+
expect(createMock.mock.calls[0]?.[0]).toMatchObject({
50+
providerOptions: {
51+
gateway: {
52+
caching: 'auto',
53+
},
54+
},
55+
})
56+
})
57+
58+
it('does not add gateway caching to non-gateway compatible endpoints', async () => {
59+
createMock.mockResolvedValueOnce({
60+
choices: [{ message: { content: 'plain' } }],
61+
usage: {
62+
prompt_tokens: 8,
63+
completion_tokens: 1,
64+
total_tokens: 9,
65+
},
66+
})
67+
68+
const runtime = new OpenAICompatibleRuntime({
69+
apiKey: 'test-key',
70+
endpoint: 'https://api.deepseek.com',
71+
model: 'deepseek-chat',
72+
providerType: AIProviderType.OpenAICompatible,
73+
providerId: 'deepseek',
74+
})
75+
76+
await runtime.generateText({ prompt: 'hello' })
77+
78+
expect(createMock).toHaveBeenCalledTimes(1)
79+
expect(createMock.mock.calls[0]?.[0]).not.toHaveProperty('providerOptions')
80+
})
81+
82+
it('does not add gateway caching to default OpenRouter requests', async () => {
83+
createMock.mockResolvedValueOnce({
84+
choices: [{ message: { content: 'plain' } }],
85+
usage: {
86+
prompt_tokens: 8,
87+
completion_tokens: 1,
88+
total_tokens: 9,
89+
},
90+
})
91+
92+
const runtime = new OpenAICompatibleRuntime({
93+
apiKey: 'test-key',
94+
model: 'openai/gpt-4o-mini',
95+
providerType: AIProviderType.OpenRouter,
96+
providerId: 'openrouter',
97+
})
98+
99+
await runtime.generateText({ prompt: 'hello' })
100+
101+
expect(createMock).toHaveBeenCalledTimes(1)
102+
expect(createMock.mock.calls[0]?.[0]).not.toHaveProperty('providerOptions')
103+
})
104+
105+
it('adds Vercel gateway automatic caching to structured requests', async () => {
106+
createMock.mockResolvedValueOnce({
107+
choices: [
108+
{
109+
message: {
110+
tool_calls: [
111+
{
112+
type: 'function',
113+
function: {
114+
name: 'structured_output',
115+
arguments: JSON.stringify({ answer: 'cached' }),
116+
},
117+
},
118+
],
119+
},
120+
},
121+
],
122+
usage: {
123+
prompt_tokens: 10,
124+
completion_tokens: 2,
125+
total_tokens: 12,
126+
},
127+
})
128+
129+
const runtime = new OpenAICompatibleRuntime({
130+
apiKey: 'test-key',
131+
endpoint: 'https://ai-gateway.vercel.sh/v1',
132+
model: 'anthropic/claude-sonnet-4.6',
133+
providerType: AIProviderType.OpenAICompatible,
134+
providerId: 'vercel-gateway',
135+
})
136+
137+
await runtime.generateStructured({
138+
prompt: 'hello',
139+
schema: z.object({ answer: z.string() }),
140+
})
141+
142+
expect(createMock.mock.calls[0]?.[0]).toMatchObject({
143+
providerOptions: {
144+
gateway: {
145+
caching: 'auto',
146+
},
147+
},
148+
})
149+
})
150+
151+
it('adds Vercel gateway automatic caching to streaming requests', async () => {
152+
createMock.mockResolvedValueOnce(
153+
(async function* () {
154+
yield {
155+
choices: [{ delta: { content: 'stream' } }],
156+
}
157+
})(),
158+
)
159+
160+
const runtime = new OpenAICompatibleRuntime({
161+
apiKey: 'test-key',
162+
endpoint: 'https://ai-gateway.vercel.sh',
163+
model: 'anthropic/claude-sonnet-4.6',
164+
providerType: AIProviderType.OpenAICompatible,
165+
providerId: 'vercel-gateway',
166+
})
167+
168+
const chunks: string[] = []
169+
for await (const chunk of runtime.generateTextStream({ prompt: 'hello' })) {
170+
chunks.push(chunk.text)
171+
}
172+
173+
expect(chunks).toEqual(['stream'])
174+
expect(createMock.mock.calls[0]?.[0]).toMatchObject({
175+
stream: true,
176+
providerOptions: {
177+
gateway: {
178+
caching: 'auto',
179+
},
180+
},
181+
})
182+
})
183+
})
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# OpenAI Gateway Prompt Cache Implementation Plan
2+
3+
> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking.
4+
5+
**Goal:** Enable Vercel AI Gateway automatic prompt caching in the OpenAI-compatible runtime without affecting non-gateway OpenAI-compatible providers.
6+
7+
**Architecture:** Detect whether the runtime resolves to a Vercel AI Gateway base URL, then decorate chat completion requests with `providerOptions.gateway.caching = 'auto'`. Reuse the same helper across text, structured, and streaming calls so behavior stays consistent.
8+
9+
**Tech Stack:** TypeScript, NestJS runtime layer, OpenAI SDK, Vitest, pnpm
10+
11+
---
12+
13+
## File Map
14+
15+
| Action | File | Responsibility |
16+
|--------|------|---------------|
17+
| Modify | `apps/core/src/modules/ai/runtime/openai-compatible.runtime.ts` | Detect Vercel AI Gateway and decorate request params |
18+
| Create | `apps/core/test/src/modules/ai/openai-compatible.runtime.spec.ts` | Verify gateway-only caching injection |
19+
20+
---
21+
22+
## Chunk 1: Runtime Contract
23+
24+
### Task 1: Lock gateway-only caching behavior with tests
25+
26+
**Files:**
27+
- Create: `apps/core/test/src/modules/ai/openai-compatible.runtime.spec.ts`
28+
29+
- [x] **Step 1: Write the failing test**
30+
31+
Cover:
32+
- Vercel AI Gateway endpoint adds `providerOptions.gateway.caching = 'auto'`
33+
- Non-gateway OpenAI-compatible endpoint does not add `providerOptions`
34+
35+
- [x] **Step 2: Run the targeted test to verify RED**
36+
37+
Run: `pnpm test -- apps/core/test/src/modules/ai/openai-compatible.runtime.spec.ts`
38+
39+
Expected: The gateway-specific assertion fails because the runtime does not yet decorate requests.
40+
41+
### Task 2: Implement gateway request decoration
42+
43+
**Files:**
44+
- Modify: `apps/core/src/modules/ai/runtime/openai-compatible.runtime.ts`
45+
46+
- [x] **Step 1: Add a helper that detects Vercel AI Gateway**
47+
48+
Use the resolved `baseURL` so detection covers normalized endpoints.
49+
50+
- [x] **Step 2: Add a helper that decorates chat completion params**
51+
52+
Inject:
53+
54+
```ts
55+
providerOptions: {
56+
gateway: {
57+
caching: 'auto',
58+
},
59+
}
60+
```
61+
62+
only when the base URL points to Vercel AI Gateway.
63+
64+
- [x] **Step 3: Reuse the helper in all request paths**
65+
66+
Update:
67+
- `generateText`
68+
- `generateStructured`
69+
- `generateTextStream`
70+
71+
- [x] **Step 4: Run the targeted test to verify GREEN**
72+
73+
Run: `pnpm test -- apps/core/test/src/modules/ai/openai-compatible.runtime.spec.ts`
74+
75+
Expected: Both tests pass.
76+
77+
- [x] **Step 5: Run a small related test slice**
78+
79+
Run: `pnpm test -- apps/core/test/src/modules/ai/ai-provider.factory.spec.ts apps/core/test/src/modules/ai/openai-compatible.runtime.spec.ts`
80+
81+
Expected: Existing runtime factory coverage still passes.

0 commit comments

Comments
 (0)