Skip to content

Commit e174d96

Browse files
authored
refactor(media): move sharp image ops into media runtime (#71519)
* refactor(media): move sharp image ops into plugin * fix(media): pass image pixel budget to sharp plugin * refactor(media): reuse media understanding sharp runtime * test(build): allow staged runtime core graphs
1 parent b2b898c commit e174d96

9 files changed

Lines changed: 255 additions & 75 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ Docs: https://docs.openclaw.ai
9797
- Plugins/PDF: move local PDF extraction into a bundled `document-extract` plugin so core no longer owns `pdfjs-dist` or PDF image-rendering dependencies. Thanks @vincentkoc.
9898
- Dependencies/memory: stop installing `node-llama-cpp` by default; local embeddings now load it only when operators install the optional runtime package. Thanks @vincentkoc.
9999
- Dependencies/media: replace the tiny core media host's Express server with `node:http`, so Express is no longer a root runtime dependency. Thanks @vincentkoc.
100+
- Dependencies/media: move Sharp-backed image attachment processing into the bundled media understanding runtime so core no longer owns the native image parser dependency. Thanks @vincentkoc.
100101
- Matrix: require full cross-signing identity trust for self-device verification and add `openclaw matrix verify self` so operators can establish that trust from the CLI. (#70401) Thanks @gumadeiras.
101102
- WebChat/sessions: keep runtime-only prompt context out of visible transcript history and scrub legacy wrappers from session history surfaces. Thanks @91wan.
102103
- Gradium: add a bundled text-to-speech provider with voice-note and telephony output support. (#64958) Thanks @LaurentMazare.
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
import type { ImageMetadata } from "openclaw/plugin-sdk/media-runtime";
2+
import type sharpImport from "sharp";
3+
4+
type SharpFactory = typeof sharpImport;
5+
6+
type ResizeToJpegParams = {
7+
buffer: Buffer;
8+
maxSide: number;
9+
quality: number;
10+
withoutEnlargement?: boolean;
11+
};
12+
13+
type ResizeToPngParams = {
14+
buffer: Buffer;
15+
maxSide: number;
16+
compressionLevel?: number;
17+
withoutEnlargement?: boolean;
18+
};
19+
20+
type MediaUnderstandingImageOpsOptions = {
21+
maxInputPixels: number;
22+
};
23+
24+
const SHARP_MODULE = "sharp";
25+
26+
let sharpFactoryPromise: Promise<SharpFactory> | null = null;
27+
28+
function normalizeSharpFactory(mod: unknown): SharpFactory {
29+
const candidates = [
30+
(mod as { default?: unknown }).default,
31+
((mod as { default?: { default?: unknown } }).default ?? {})?.default,
32+
mod,
33+
];
34+
const sharp = candidates.find(
35+
(candidate): candidate is SharpFactory => typeof candidate === "function",
36+
);
37+
if (!sharp) {
38+
throw new Error("Optional dependency sharp did not expose an image processor");
39+
}
40+
return sharp;
41+
}
42+
43+
async function loadSharp(maxInputPixels: number): Promise<SharpFactory> {
44+
if (!sharpFactoryPromise) {
45+
sharpFactoryPromise = import(SHARP_MODULE)
46+
.then((mod) => {
47+
const sharp = normalizeSharpFactory(mod);
48+
return ((buffer, options) =>
49+
sharp(buffer, {
50+
...options,
51+
failOnError: false,
52+
limitInputPixels: maxInputPixels,
53+
})) as SharpFactory;
54+
})
55+
.catch((err) => {
56+
sharpFactoryPromise = null;
57+
throw new Error("Optional dependency sharp is required for image attachment processing", {
58+
cause: err,
59+
});
60+
});
61+
}
62+
return await sharpFactoryPromise;
63+
}
64+
65+
function normalizeMaxInputPixels(value: number): number {
66+
if (!Number.isSafeInteger(value) || value <= 0) {
67+
throw new Error("Media attachment image ops require a positive maxInputPixels budget");
68+
}
69+
return value;
70+
}
71+
72+
function normalizeMetadata(meta: { width?: number; height?: number }): ImageMetadata | null {
73+
const width = meta.width ?? 0;
74+
const height = meta.height ?? 0;
75+
if (!Number.isFinite(width) || !Number.isFinite(height)) {
76+
return null;
77+
}
78+
if (width <= 0 || height <= 0) {
79+
return null;
80+
}
81+
return { width, height };
82+
}
83+
84+
export function createMediaAttachmentImageOps(options: MediaUnderstandingImageOpsOptions) {
85+
const maxInputPixels = normalizeMaxInputPixels(options.maxInputPixels);
86+
return {
87+
async getImageMetadata(buffer: Buffer): Promise<ImageMetadata | null> {
88+
const sharp = await loadSharp(maxInputPixels);
89+
return normalizeMetadata(await sharp(buffer).metadata());
90+
},
91+
92+
async normalizeExifOrientation(buffer: Buffer): Promise<Buffer> {
93+
const sharp = await loadSharp(maxInputPixels);
94+
return await sharp(buffer).rotate().toBuffer();
95+
},
96+
97+
async resizeToJpeg(params: ResizeToJpegParams): Promise<Buffer> {
98+
const sharp = await loadSharp(maxInputPixels);
99+
return await sharp(params.buffer)
100+
.rotate()
101+
.resize({
102+
width: params.maxSide,
103+
height: params.maxSide,
104+
fit: "inside",
105+
withoutEnlargement: params.withoutEnlargement !== false,
106+
})
107+
.jpeg({ quality: params.quality, mozjpeg: true })
108+
.toBuffer();
109+
},
110+
111+
async convertHeicToJpeg(buffer: Buffer): Promise<Buffer> {
112+
const sharp = await loadSharp(maxInputPixels);
113+
return await sharp(buffer).jpeg({ quality: 90, mozjpeg: true }).toBuffer();
114+
},
115+
116+
async hasAlphaChannel(buffer: Buffer): Promise<boolean> {
117+
const sharp = await loadSharp(maxInputPixels);
118+
const meta = await sharp(buffer).metadata();
119+
return meta.hasAlpha || meta.channels === 4;
120+
},
121+
122+
async resizeToPng(params: ResizeToPngParams): Promise<Buffer> {
123+
const sharp = await loadSharp(maxInputPixels);
124+
const compressionLevel = params.compressionLevel ?? 6;
125+
return await sharp(params.buffer)
126+
.rotate()
127+
.resize({
128+
width: params.maxSide,
129+
height: params.maxSide,
130+
fit: "inside",
131+
withoutEnlargement: params.withoutEnlargement !== false,
132+
})
133+
.png({ compressionLevel })
134+
.toBuffer();
135+
},
136+
};
137+
}

extensions/media-understanding-core/package.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,15 @@
44
"private": true,
55
"description": "OpenClaw media understanding runtime package",
66
"type": "module",
7+
"dependencies": {
8+
"sharp": "^0.34.5"
9+
},
710
"devDependencies": {
811
"@openclaw/plugin-sdk": "workspace:*"
12+
},
13+
"openclaw": {
14+
"bundle": {
15+
"stageRuntimeDependencies": true
16+
}
917
}
1018
}

package.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1636,7 +1636,6 @@
16361636
"osc-progress": "^0.3.0",
16371637
"proxy-agent": "^8.0.1",
16381638
"semver": "7.7.4",
1639-
"sharp": "^0.34.5",
16401639
"sqlite-vec": "0.1.9",
16411640
"tar": "7.5.13",
16421641
"tslog": "^4.10.2",

pnpm-lock.yaml

Lines changed: 4 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

scripts/lib/dependency-ownership.json

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,9 @@
164164
"risk": ["version-parser"]
165165
},
166166
"sharp": {
167-
"owner": "capability:image-ops",
168-
"class": "default-runtime-initially",
167+
"owner": "plugin:media-understanding-core",
168+
"class": "plugin-runtime",
169+
"activation": ["media-understanding-core.image-ops"],
169170
"risk": ["native", "parser", "untrusted-files"]
170171
},
171172
"sqlite-vec": {
@@ -193,6 +194,11 @@
193194
"class": "core-runtime",
194195
"risk": ["network"]
195196
},
197+
"web-push": {
198+
"owner": "core:web-push",
199+
"class": "core-runtime",
200+
"risk": ["network", "push-notifications", "crypto"]
201+
},
196202
"ws": {
197203
"owner": "core:gateway-websocket",
198204
"class": "core-runtime",

src/infra/tsdown-config.test.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ function entryKeys(config: TsdownConfigEntry): string[] {
4141
return Object.keys(config.entry);
4242
}
4343

44+
function hasBundledPluginRuntimeEntry(config: TsdownConfigEntry): boolean {
45+
const keys = entryKeys(config);
46+
return keys.includes("index") || keys.includes("runtime-api");
47+
}
48+
4449
function bundledEntry(pluginId: string): string {
4550
return `${bundledPluginRoot(pluginId)}/index`;
4651
}
@@ -82,12 +87,19 @@ describe("tsdown config", () => {
8287
);
8388

8489
expect(stagedGraphs.length).toBeGreaterThan(0);
85-
expect(stagedGraphs.every((config) => entryKeys(config).includes("index"))).toBe(true);
90+
expect(stagedGraphs.every(hasBundledPluginRuntimeEntry)).toBe(true);
8691
expect(stagedGraphs.every((config) => !entryKeys(config).includes("plugin-sdk/index"))).toBe(
8792
true,
8893
);
8994
expect(stagedGraphs.some((config) => config.outDir === "dist/extensions/discord")).toBe(true);
9095
expect(stagedGraphs.some((config) => config.outDir === "dist/extensions/msteams")).toBe(true);
96+
expect(
97+
stagedGraphs.some(
98+
(config) =>
99+
config.outDir === "dist/extensions/media-understanding-core" &&
100+
entryKeys(config).includes("image-ops"),
101+
),
102+
).toBe(true);
91103
});
92104

93105
it("does not emit plugin-sdk or hooks from a separate dist graph", () => {

0 commit comments

Comments
 (0)