Skip to content

Commit 409fd92

Browse files
committed
feat(zai): add GLM-4.7 model with thinking mode support
- Add GLM-4.7 model definition for international and mainland China APIs - Fix maxTokens from 98,304 to 16,384 for all Z.ai models - Add zai-format.ts converter to preserve reasoning_content - Override createStream in ZAiHandler for explicit thinking control - Add comprehensive tests for GLM-4.7 thinking mode
1 parent f462eeb commit 409fd92

File tree

4 files changed

+529
-14
lines changed

4 files changed

+529
-14
lines changed

packages/types/src/providers/zai.ts

Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ export type InternationalZAiModelId = keyof typeof internationalZAiModels
1212
export const internationalZAiDefaultModelId: InternationalZAiModelId = "glm-4.6"
1313
export const internationalZAiModels = {
1414
"glm-4.5": {
15-
maxTokens: 98_304,
15+
maxTokens: 16_384,
1616
contextWindow: 131_072,
1717
supportsImages: false,
1818
supportsPromptCache: true,
@@ -26,7 +26,7 @@ export const internationalZAiModels = {
2626
"GLM-4.5 is Zhipu's latest featured model. Its comprehensive capabilities in reasoning, coding, and agent reach the state-of-the-art (SOTA) level among open-source models, with a context length of up to 128k.",
2727
},
2828
"glm-4.5-air": {
29-
maxTokens: 98_304,
29+
maxTokens: 16_384,
3030
contextWindow: 131_072,
3131
supportsImages: false,
3232
supportsPromptCache: true,
@@ -40,7 +40,7 @@ export const internationalZAiModels = {
4040
"GLM-4.5-Air is the lightweight version of GLM-4.5. It balances performance and cost-effectiveness, and can flexibly switch to hybrid thinking models.",
4141
},
4242
"glm-4.5-x": {
43-
maxTokens: 98_304,
43+
maxTokens: 16_384,
4444
contextWindow: 131_072,
4545
supportsImages: false,
4646
supportsPromptCache: true,
@@ -54,7 +54,7 @@ export const internationalZAiModels = {
5454
"GLM-4.5-X is a high-performance variant optimized for strong reasoning with ultra-fast responses.",
5555
},
5656
"glm-4.5-airx": {
57-
maxTokens: 98_304,
57+
maxTokens: 16_384,
5858
contextWindow: 131_072,
5959
supportsImages: false,
6060
supportsPromptCache: true,
@@ -67,7 +67,7 @@ export const internationalZAiModels = {
6767
description: "GLM-4.5-AirX is a lightweight, ultra-fast variant delivering strong performance with lower cost.",
6868
},
6969
"glm-4.5-flash": {
70-
maxTokens: 98_304,
70+
maxTokens: 16_384,
7171
contextWindow: 131_072,
7272
supportsImages: false,
7373
supportsPromptCache: true,
@@ -94,7 +94,7 @@ export const internationalZAiModels = {
9494
"GLM-4.5V is Z.AI's multimodal visual reasoning model (image/video/text/file input), optimized for GUI tasks, grounding, and document/video understanding.",
9595
},
9696
"glm-4.6": {
97-
maxTokens: 98_304,
97+
maxTokens: 16_384,
9898
contextWindow: 200_000,
9999
supportsImages: false,
100100
supportsPromptCache: true,
@@ -107,8 +107,25 @@ export const internationalZAiModels = {
107107
description:
108108
"GLM-4.6 is Zhipu's newest model with an extended context window of up to 200k tokens, providing enhanced capabilities for processing longer documents and conversations.",
109109
},
110+
"glm-4.7": {
111+
maxTokens: 16_384,
112+
contextWindow: 200_000,
113+
supportsImages: false,
114+
supportsPromptCache: true,
115+
supportsNativeTools: true,
116+
defaultToolProtocol: "native",
117+
supportsReasoningEffort: ["disable", "medium"],
118+
reasoningEffort: "medium",
119+
preserveReasoning: true,
120+
inputPrice: 0.6,
121+
outputPrice: 2.2,
122+
cacheWritesPrice: 0,
123+
cacheReadsPrice: 0.11,
124+
description:
125+
"GLM-4.7 is Zhipu's latest model with built-in thinking capabilities enabled by default. It provides enhanced reasoning for complex tasks while maintaining fast response times.",
126+
},
110127
"glm-4-32b-0414-128k": {
111-
maxTokens: 98_304,
128+
maxTokens: 16_384,
112129
contextWindow: 131_072,
113130
supportsImages: false,
114131
supportsPromptCache: false,
@@ -126,7 +143,7 @@ export type MainlandZAiModelId = keyof typeof mainlandZAiModels
126143
export const mainlandZAiDefaultModelId: MainlandZAiModelId = "glm-4.6"
127144
export const mainlandZAiModels = {
128145
"glm-4.5": {
129-
maxTokens: 98_304,
146+
maxTokens: 16_384,
130147
contextWindow: 131_072,
131148
supportsImages: false,
132149
supportsPromptCache: true,
@@ -140,7 +157,7 @@ export const mainlandZAiModels = {
140157
"GLM-4.5 is Zhipu's latest featured model. Its comprehensive capabilities in reasoning, coding, and agent reach the state-of-the-art (SOTA) level among open-source models, with a context length of up to 128k.",
141158
},
142159
"glm-4.5-air": {
143-
maxTokens: 98_304,
160+
maxTokens: 16_384,
144161
contextWindow: 131_072,
145162
supportsImages: false,
146163
supportsPromptCache: true,
@@ -154,7 +171,7 @@ export const mainlandZAiModels = {
154171
"GLM-4.5-Air is the lightweight version of GLM-4.5. It balances performance and cost-effectiveness, and can flexibly switch to hybrid thinking models.",
155172
},
156173
"glm-4.5-x": {
157-
maxTokens: 98_304,
174+
maxTokens: 16_384,
158175
contextWindow: 131_072,
159176
supportsImages: false,
160177
supportsPromptCache: true,
@@ -168,7 +185,7 @@ export const mainlandZAiModels = {
168185
"GLM-4.5-X is a high-performance variant optimized for strong reasoning with ultra-fast responses.",
169186
},
170187
"glm-4.5-airx": {
171-
maxTokens: 98_304,
188+
maxTokens: 16_384,
172189
contextWindow: 131_072,
173190
supportsImages: false,
174191
supportsPromptCache: true,
@@ -181,7 +198,7 @@ export const mainlandZAiModels = {
181198
description: "GLM-4.5-AirX is a lightweight, ultra-fast variant delivering strong performance with lower cost.",
182199
},
183200
"glm-4.5-flash": {
184-
maxTokens: 98_304,
201+
maxTokens: 16_384,
185202
contextWindow: 131_072,
186203
supportsImages: false,
187204
supportsPromptCache: true,
@@ -208,7 +225,7 @@ export const mainlandZAiModels = {
208225
"GLM-4.5V is Z.AI's multimodal visual reasoning model (image/video/text/file input), optimized for GUI tasks, grounding, and document/video understanding.",
209226
},
210227
"glm-4.6": {
211-
maxTokens: 98_304,
228+
maxTokens: 16_384,
212229
contextWindow: 204_800,
213230
supportsImages: false,
214231
supportsPromptCache: true,
@@ -221,6 +238,23 @@ export const mainlandZAiModels = {
221238
description:
222239
"GLM-4.6 is Zhipu's newest model with an extended context window of up to 200k tokens, providing enhanced capabilities for processing longer documents and conversations.",
223240
},
241+
"glm-4.7": {
242+
maxTokens: 16_384,
243+
contextWindow: 204_800,
244+
supportsImages: false,
245+
supportsPromptCache: true,
246+
supportsNativeTools: true,
247+
defaultToolProtocol: "native",
248+
supportsReasoningEffort: ["disable", "medium"],
249+
reasoningEffort: "medium",
250+
preserveReasoning: true,
251+
inputPrice: 0.29,
252+
outputPrice: 1.14,
253+
cacheWritesPrice: 0,
254+
cacheReadsPrice: 0.057,
255+
description:
256+
"GLM-4.7 is Zhipu's latest model with built-in thinking capabilities enabled by default. It provides enhanced reasoning for complex tasks while maintaining fast response times.",
257+
},
224258
} as const satisfies Record<string, ModelInfo>
225259

226260
export const ZAI_DEFAULT_TEMPERATURE = 0.6

src/api/providers/__tests__/zai.spec.ts

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,22 @@ describe("ZAiHandler", () => {
8282
expect(model.info.contextWindow).toBe(200_000)
8383
})
8484

85+
it("should return GLM-4.7 international model with thinking support", () => {
86+
const testModelId: InternationalZAiModelId = "glm-4.7"
87+
const handlerWithModel = new ZAiHandler({
88+
apiModelId: testModelId,
89+
zaiApiKey: "test-zai-api-key",
90+
zaiApiLine: "international_coding",
91+
})
92+
const model = handlerWithModel.getModel()
93+
expect(model.id).toBe(testModelId)
94+
expect(model.info).toEqual(internationalZAiModels[testModelId])
95+
expect(model.info.contextWindow).toBe(200_000)
96+
expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"])
97+
expect(model.info.reasoningEffort).toBe("medium")
98+
expect(model.info.preserveReasoning).toBe(true)
99+
})
100+
85101
it("should return GLM-4.5v international model with vision support", () => {
86102
const testModelId: InternationalZAiModelId = "glm-4.5v"
87103
const handlerWithModel = new ZAiHandler({
@@ -161,6 +177,22 @@ describe("ZAiHandler", () => {
161177
expect(model.info.maxTokens).toBe(16_384)
162178
expect(model.info.contextWindow).toBe(131_072)
163179
})
180+
181+
it("should return GLM-4.7 China model with thinking support", () => {
182+
const testModelId: MainlandZAiModelId = "glm-4.7"
183+
const handlerWithModel = new ZAiHandler({
184+
apiModelId: testModelId,
185+
zaiApiKey: "test-zai-api-key",
186+
zaiApiLine: "china_coding",
187+
})
188+
const model = handlerWithModel.getModel()
189+
expect(model.id).toBe(testModelId)
190+
expect(model.info).toEqual(mainlandZAiModels[testModelId])
191+
expect(model.info.contextWindow).toBe(204_800)
192+
expect(model.info.supportsReasoningEffort).toEqual(["disable", "medium"])
193+
expect(model.info.reasoningEffort).toBe("medium")
194+
expect(model.info.preserveReasoning).toBe(true)
195+
})
164196
})
165197

166198
describe("International API", () => {
@@ -371,4 +403,123 @@ describe("ZAiHandler", () => {
371403
)
372404
})
373405
})
406+
407+
describe("GLM-4.7 Thinking Mode", () => {
408+
it("should enable thinking by default for GLM-4.7 (default reasoningEffort is medium)", async () => {
409+
const handlerWithModel = new ZAiHandler({
410+
apiModelId: "glm-4.7",
411+
zaiApiKey: "test-zai-api-key",
412+
zaiApiLine: "international_coding",
413+
// No reasoningEffort setting - should use model default (medium)
414+
})
415+
416+
mockCreate.mockImplementationOnce(() => {
417+
return {
418+
[Symbol.asyncIterator]: () => ({
419+
async next() {
420+
return { done: true }
421+
},
422+
}),
423+
}
424+
})
425+
426+
const messageGenerator = handlerWithModel.createMessage("system prompt", [])
427+
await messageGenerator.next()
428+
429+
// For GLM-4.7 with default reasoning (medium), thinking should be enabled
430+
expect(mockCreate).toHaveBeenCalledWith(
431+
expect.objectContaining({
432+
model: "glm-4.7",
433+
thinking: { type: "enabled" },
434+
}),
435+
)
436+
})
437+
438+
it("should disable thinking for GLM-4.7 when reasoningEffort is set to disable", async () => {
439+
const handlerWithModel = new ZAiHandler({
440+
apiModelId: "glm-4.7",
441+
zaiApiKey: "test-zai-api-key",
442+
zaiApiLine: "international_coding",
443+
enableReasoningEffort: true,
444+
reasoningEffort: "disable",
445+
})
446+
447+
mockCreate.mockImplementationOnce(() => {
448+
return {
449+
[Symbol.asyncIterator]: () => ({
450+
async next() {
451+
return { done: true }
452+
},
453+
}),
454+
}
455+
})
456+
457+
const messageGenerator = handlerWithModel.createMessage("system prompt", [])
458+
await messageGenerator.next()
459+
460+
// For GLM-4.7 with reasoning disabled, thinking should be disabled
461+
expect(mockCreate).toHaveBeenCalledWith(
462+
expect.objectContaining({
463+
model: "glm-4.7",
464+
thinking: { type: "disabled" },
465+
}),
466+
)
467+
})
468+
469+
it("should enable thinking for GLM-4.7 when reasoningEffort is set to medium", async () => {
470+
const handlerWithModel = new ZAiHandler({
471+
apiModelId: "glm-4.7",
472+
zaiApiKey: "test-zai-api-key",
473+
zaiApiLine: "international_coding",
474+
enableReasoningEffort: true,
475+
reasoningEffort: "medium",
476+
})
477+
478+
mockCreate.mockImplementationOnce(() => {
479+
return {
480+
[Symbol.asyncIterator]: () => ({
481+
async next() {
482+
return { done: true }
483+
},
484+
}),
485+
}
486+
})
487+
488+
const messageGenerator = handlerWithModel.createMessage("system prompt", [])
489+
await messageGenerator.next()
490+
491+
// For GLM-4.7 with reasoning set to medium, thinking should be enabled
492+
expect(mockCreate).toHaveBeenCalledWith(
493+
expect.objectContaining({
494+
model: "glm-4.7",
495+
thinking: { type: "enabled" },
496+
}),
497+
)
498+
})
499+
500+
it("should NOT add thinking parameter for non-thinking models like GLM-4.6", async () => {
501+
const handlerWithModel = new ZAiHandler({
502+
apiModelId: "glm-4.6",
503+
zaiApiKey: "test-zai-api-key",
504+
zaiApiLine: "international_coding",
505+
})
506+
507+
mockCreate.mockImplementationOnce(() => {
508+
return {
509+
[Symbol.asyncIterator]: () => ({
510+
async next() {
511+
return { done: true }
512+
},
513+
}),
514+
}
515+
})
516+
517+
const messageGenerator = handlerWithModel.createMessage("system prompt", [])
518+
await messageGenerator.next()
519+
520+
// For GLM-4.6 (no thinking support), thinking parameter should not be present
521+
const callArgs = mockCreate.mock.calls[0][0]
522+
expect(callArgs.thinking).toBeUndefined()
523+
})
524+
})
374525
})

0 commit comments

Comments
 (0)