Skip to content

Commit 0a5ea57

Browse files
authored
Improve minChunkSize algorithm (#4723)
* Slightly simplify graph analysis Also generates dynamicImportsByEntry that will help us to track already loaded modules more efficiently. * Try to merge small side effect chunks first * Used cached hasEffects to improve performance * Add comment explaining chunk merge strategy * Add test for cycle prevention We probably need complete transitive dependency maps to continue here * Avoid cycles when merging chunks * Avoid cycles when merging chunks * Log cycles in generated chunks * Improve cycle prevention mechanism * Hopefully fix the algorithm for good * Add logging * Use a much more basic algorithm * Remove logging * Improve coverage
1 parent 52ba95c commit 0a5ea57

63 files changed

Lines changed: 621 additions & 95 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

src/Module.ts

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { locate } from 'locate-character';
44
import MagicString from 'magic-string';
55
import ExternalModule from './ExternalModule';
66
import type Graph from './Graph';
7-
import { createHasEffectsContext, createInclusionContext } from './ast/ExecutionContext';
7+
import { createInclusionContext } from './ast/ExecutionContext';
88
import { nodeConstructors } from './ast/nodes';
99
import ExportAllDeclaration from './ast/nodes/ExportAllDeclaration';
1010
import ExportDefaultDeclaration from './ast/nodes/ExportDefaultDeclaration';
@@ -662,10 +662,7 @@ export default class Module {
662662
}
663663

664664
hasEffects(): boolean {
665-
return (
666-
this.info.moduleSideEffects === 'no-treeshake' ||
667-
(this.ast!.included && this.ast!.hasEffects(createHasEffectsContext()))
668-
);
665+
return this.info.moduleSideEffects === 'no-treeshake' || this.ast!.hasCachedEffects();
669666
}
670667

671668
include(): void {

src/ast/nodes/Program.ts

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import type MagicString from 'magic-string';
22
import { type RenderOptions, renderStatementList } from '../../utils/renderHelpers';
3+
import { createHasEffectsContext } from '../ExecutionContext';
34
import type { HasEffectsContext, InclusionContext } from '../ExecutionContext';
45
import type * as NodeType from './NodeType';
56
import { type IncludeChildren, NodeBase, type StatementNode } from './shared/Node';
@@ -9,11 +10,15 @@ export default class Program extends NodeBase {
910
declare sourceType: 'module';
1011
declare type: NodeType.tProgram;
1112

12-
private hasCachedEffect = false;
13+
private hasCachedEffect: boolean | null = null;
14+
15+
hasCachedEffects(): boolean {
16+
return this.hasCachedEffect === null
17+
? (this.hasCachedEffect = this.hasEffects(createHasEffectsContext()))
18+
: this.hasCachedEffect;
19+
}
1320

1421
hasEffects(context: HasEffectsContext): boolean {
15-
// We are caching here to later more efficiently identify side-effect-free modules
16-
if (this.hasCachedEffect) return true;
1722
for (const node of this.body) {
1823
if (node.hasEffects(context)) {
1924
return (this.hasCachedEffect = true);

src/utils/chunkAssignment.ts

Lines changed: 212 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import ExternalModule from '../ExternalModule';
22
import Module from '../Module';
3-
import { EMPTY_ARRAY } from './blank';
43
import { getNewSet, getOrCreate } from './getOrCreate';
54
import { concatLazy } from './iterators';
65
import { timeEnd, timeStart } from './timers';
@@ -202,19 +201,21 @@ function isModuleAlreadyLoaded(
202201
return true;
203202
}
204203

205-
EMPTY_ARRAY;
206-
207204
interface ChunkDescription {
208-
alias: null;
205+
dependencies: Set<ChunkDescription>;
206+
dependentChunks: Set<ChunkDescription>;
209207
modules: Module[];
208+
pure: boolean;
210209
signature: string;
211-
size: number | null;
212-
}
213-
214-
interface MergeableChunkDescription extends ChunkDescription {
215210
size: number;
216211
}
217212

213+
type ChunkPartition = {
214+
[key in 'small' | 'big']: {
215+
[subKey in 'pure' | 'sideEffect']: Set<ChunkDescription>;
216+
};
217+
};
218+
218219
function createChunks(
219220
allEntries: Iterable<Module>,
220221
assignedEntriesByModule: DependentModuleMap,
@@ -226,59 +227,12 @@ function createChunks(
226227
alias: null,
227228
modules
228229
}))
229-
: getOptimizedChunks(chunkModulesBySignature, minChunkSize);
230-
}
231-
232-
function getOptimizedChunks(
233-
chunkModulesBySignature: { [chunkSignature: string]: Module[] },
234-
minChunkSize: number
235-
) {
236-
timeStart('optimize chunks', 3);
237-
const { chunksToBeMerged, unmergeableChunks } = getMergeableChunks(
238-
chunkModulesBySignature,
239-
minChunkSize
240-
);
241-
for (const sourceChunk of chunksToBeMerged) {
242-
chunksToBeMerged.delete(sourceChunk);
243-
let closestChunk: ChunkDescription | null = null;
244-
let closestChunkDistance = Infinity;
245-
const { signature, size, modules } = sourceChunk;
246-
247-
for (const targetChunk of concatLazy(chunksToBeMerged, unmergeableChunks)) {
248-
const distance = getSignatureDistance(
249-
signature,
250-
targetChunk.signature,
251-
!chunksToBeMerged.has(targetChunk)
252-
);
253-
if (distance === 1) {
254-
closestChunk = targetChunk;
255-
break;
256-
} else if (distance < closestChunkDistance) {
257-
closestChunk = targetChunk;
258-
closestChunkDistance = distance;
259-
}
260-
}
261-
if (closestChunk) {
262-
closestChunk.modules.push(...modules);
263-
if (chunksToBeMerged.has(closestChunk)) {
264-
closestChunk.signature = mergeSignatures(signature, closestChunk.signature);
265-
if ((closestChunk.size += size) > minChunkSize) {
266-
chunksToBeMerged.delete(closestChunk);
267-
unmergeableChunks.push(closestChunk);
268-
}
269-
}
270-
} else {
271-
unmergeableChunks.push(sourceChunk);
272-
}
273-
}
274-
timeEnd('optimize chunks', 3);
275-
return unmergeableChunks;
230+
: getOptimizedChunks(chunkModulesBySignature, minChunkSize).map(({ modules }) => ({
231+
alias: null,
232+
modules
233+
}));
276234
}
277235

278-
const CHAR_DEPENDENT = 'X';
279-
const CHAR_INDEPENDENT = '_';
280-
const CHAR_CODE_DEPENDENT = CHAR_DEPENDENT.charCodeAt(0);
281-
282236
function getChunkModulesBySignature(
283237
assignedEntriesByModule: ReadonlyDependentModuleMap,
284238
allEntries: Iterable<Module>
@@ -299,33 +253,216 @@ function getChunkModulesBySignature(
299253
return chunkModules;
300254
}
301255

302-
function getMergeableChunks(
256+
/**
257+
* This function tries to get rid of small chunks by merging them with other
258+
* chunks. In order to merge chunks, one must obey the following rule:
259+
* - When merging several chunks, at most one of the chunks can have side
260+
* effects
261+
* - When one of the chunks has side effects, the entry points depending on that
262+
* chunk need to be a super set of the entry points depending on the other
263+
* chunks
264+
* - Pure chunks can always be merged
265+
* - We use the entry point dependence signature to calculate "chunk distance",
266+
* i.e. how likely it is that two chunks are loaded together
267+
*/
268+
function getOptimizedChunks(
303269
chunkModulesBySignature: { [chunkSignature: string]: Module[] },
304270
minChunkSize: number
305271
) {
306-
const chunksToBeMerged = new Set() as Set<MergeableChunkDescription> & {
307-
has(chunk: unknown): chunk is MergeableChunkDescription;
308-
};
309-
const unmergeableChunks: ChunkDescription[] = [];
310-
const alias = null;
272+
timeStart('optimize chunks', 3);
273+
const chunkPartition = getPartitionedChunks(chunkModulesBySignature, minChunkSize);
274+
if (chunkPartition.small.sideEffect.size > 0) {
275+
mergeChunks(
276+
chunkPartition.small.sideEffect,
277+
[chunkPartition.small.pure, chunkPartition.big.pure],
278+
minChunkSize,
279+
chunkPartition
280+
);
281+
}
282+
283+
if (chunkPartition.small.pure.size > 0) {
284+
mergeChunks(
285+
chunkPartition.small.pure,
286+
[chunkPartition.small.pure, chunkPartition.big.sideEffect, chunkPartition.big.pure],
287+
minChunkSize,
288+
chunkPartition
289+
);
290+
}
291+
timeEnd('optimize chunks', 3);
292+
return [
293+
...chunkPartition.small.sideEffect,
294+
...chunkPartition.small.pure,
295+
...chunkPartition.big.sideEffect,
296+
...chunkPartition.big.pure
297+
];
298+
}
299+
300+
const CHAR_DEPENDENT = 'X';
301+
const CHAR_INDEPENDENT = '_';
302+
const CHAR_CODE_DEPENDENT = CHAR_DEPENDENT.charCodeAt(0);
303+
304+
function getPartitionedChunks(
305+
chunkModulesBySignature: { [chunkSignature: string]: Module[] },
306+
minChunkSize: number
307+
): ChunkPartition {
308+
const smallPureChunks: ChunkDescription[] = [];
309+
const bigPureChunks: ChunkDescription[] = [];
310+
const smallSideEffectChunks: ChunkDescription[] = [];
311+
const bigSideEffectChunks: ChunkDescription[] = [];
312+
const chunkByModule = new Map<Module, ChunkDescription>();
311313
for (const [signature, modules] of Object.entries(chunkModulesBySignature)) {
314+
const chunkDescription: ChunkDescription = {
315+
dependencies: new Set<ChunkDescription>(),
316+
dependentChunks: new Set<ChunkDescription>(),
317+
modules,
318+
pure: true,
319+
signature,
320+
size: 0
321+
};
312322
let size = 0;
313-
checkModules: {
323+
let pure = true;
324+
for (const module of modules) {
325+
chunkByModule.set(module, chunkDescription);
326+
pure &&= !module.hasEffects();
327+
// Unfortunately, we cannot take tree-shaking into account here because
328+
// rendering did not happen yet
329+
size += module.originalCode.length;
330+
}
331+
chunkDescription.pure = pure;
332+
chunkDescription.size = size;
333+
(size < minChunkSize
334+
? pure
335+
? smallPureChunks
336+
: smallSideEffectChunks
337+
: pure
338+
? bigPureChunks
339+
: bigSideEffectChunks
340+
).push(chunkDescription);
341+
}
342+
sortChunksAndAddDependencies(
343+
[bigPureChunks, bigSideEffectChunks, smallPureChunks, smallSideEffectChunks],
344+
chunkByModule
345+
);
346+
return {
347+
big: { pure: new Set(bigPureChunks), sideEffect: new Set(bigSideEffectChunks) },
348+
small: { pure: new Set(smallPureChunks), sideEffect: new Set(smallSideEffectChunks) }
349+
};
350+
}
351+
352+
function sortChunksAndAddDependencies(
353+
chunkLists: ChunkDescription[][],
354+
chunkByModule: Map<Module, ChunkDescription>
355+
) {
356+
for (const chunks of chunkLists) {
357+
chunks.sort(compareChunks);
358+
for (const chunk of chunks) {
359+
const { dependencies, modules } = chunk;
314360
for (const module of modules) {
315-
if (module.hasEffects()) {
316-
break checkModules;
361+
for (const dependency of module.getDependenciesToBeIncluded()) {
362+
const dependencyChunk = chunkByModule.get(dependency as Module);
363+
if (dependencyChunk && dependencyChunk !== chunk) {
364+
dependencies.add(dependencyChunk);
365+
dependencyChunk.dependentChunks.add(chunk);
366+
}
317367
}
318-
size += module.magicString.toString().length;
319-
if (size > minChunkSize) {
320-
break checkModules;
368+
}
369+
}
370+
}
371+
}
372+
373+
function compareChunks(
374+
{ size: sizeA }: ChunkDescription,
375+
{ size: sizeB }: ChunkDescription
376+
): number {
377+
return sizeA - sizeB;
378+
}
379+
380+
function mergeChunks(
381+
chunksToBeMerged: Set<ChunkDescription>,
382+
targetChunks: Set<ChunkDescription>[],
383+
minChunkSize: number,
384+
chunkPartition: ChunkPartition
385+
) {
386+
for (const mergedChunk of chunksToBeMerged) {
387+
let closestChunk: ChunkDescription | null = null;
388+
let closestChunkDistance = Infinity;
389+
const { signature, modules, pure, size } = mergedChunk;
390+
391+
for (const targetChunk of concatLazy(targetChunks)) {
392+
if (mergedChunk === targetChunk) continue;
393+
// Possible improvement:
394+
// For dynamic entries depending on a pure chunk, it is safe to merge that
395+
// chunk into the chunk doing the dynamic import (i.e. into an "already
396+
// loaded chunk") even if it is not pure.
397+
// One way of handling this could be to add all "already loaded entries"
398+
// of the dynamic importers into the signature as well. That could also
399+
// change the way we do code-splitting for already loaded entries.
400+
const distance = pure
401+
? getSignatureDistance(signature, targetChunk.signature, !targetChunk.pure)
402+
: getSignatureDistance(targetChunk.signature, signature, true);
403+
if (distance < closestChunkDistance && isValidMerge(mergedChunk, targetChunk)) {
404+
if (distance === 1) {
405+
closestChunk = targetChunk;
406+
break;
321407
}
408+
closestChunk = targetChunk;
409+
closestChunkDistance = distance;
322410
}
323-
chunksToBeMerged.add({ alias, modules, signature, size });
324-
continue;
325411
}
326-
unmergeableChunks.push({ alias, modules, signature, size: null });
412+
if (closestChunk) {
413+
chunksToBeMerged.delete(mergedChunk);
414+
getChunksInPartition(closestChunk, minChunkSize, chunkPartition).delete(closestChunk);
415+
closestChunk.modules.push(...modules);
416+
closestChunk.size += size;
417+
closestChunk.pure &&= pure;
418+
closestChunk.signature = mergeSignatures(signature, closestChunk.signature);
419+
const { dependencies, dependentChunks } = closestChunk;
420+
for (const dependency of mergedChunk.dependencies) {
421+
dependencies.add(dependency);
422+
}
423+
for (const dependentChunk of mergedChunk.dependentChunks) {
424+
dependentChunks.add(dependentChunk);
425+
dependentChunk.dependencies.delete(mergedChunk);
426+
dependentChunk.dependencies.add(closestChunk);
427+
}
428+
dependencies.delete(closestChunk);
429+
getChunksInPartition(closestChunk, minChunkSize, chunkPartition).add(closestChunk);
430+
}
327431
}
328-
return { chunksToBeMerged, unmergeableChunks };
432+
}
433+
434+
// Merging will not produce cycles if none of the direct non-merged dependencies
435+
// of a chunk have the other chunk as a transitive dependency
436+
function isValidMerge(mergedChunk: ChunkDescription, targetChunk: ChunkDescription) {
437+
return !(
438+
hasTransitiveDependency(mergedChunk, targetChunk) ||
439+
hasTransitiveDependency(targetChunk, mergedChunk)
440+
);
441+
}
442+
443+
function hasTransitiveDependency(
444+
dependentChunk: ChunkDescription,
445+
dependencyChunk: ChunkDescription
446+
) {
447+
const chunksToCheck = new Set(dependentChunk.dependencies);
448+
for (const { dependencies } of chunksToCheck) {
449+
for (const dependency of dependencies) {
450+
if (dependency === dependencyChunk) {
451+
return true;
452+
}
453+
chunksToCheck.add(dependency);
454+
}
455+
}
456+
return false;
457+
}
458+
459+
function getChunksInPartition(
460+
chunk: ChunkDescription,
461+
minChunkSize: number,
462+
chunkPartition: ChunkPartition
463+
): Set<ChunkDescription> {
464+
const subPartition = chunk.size < minChunkSize ? chunkPartition.small : chunkPartition.big;
465+
return chunk.pure ? subPartition.pure : subPartition.sideEffect;
329466
}
330467

331468
function getSignatureDistance(

src/utils/iterators.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* their iterators. Useful when e.g. working with large sets or lists and when
44
* there is a chance that the iterators will not be fully exhausted.
55
*/
6-
export function* concatLazy<T>(...iterables: Iterable<T>[]) {
6+
export function* concatLazy<T>(iterables: Iterable<T>[]): Iterable<T> {
77
for (const iterable of iterables) {
88
yield* iterable;
99
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
module.exports = {
2+
description: 'avoids circular dependencies when merging chunks',
3+
options: {
4+
input: ['main1.js', 'main2.js', 'main3.js'],
5+
output: {
6+
experimentalMinChunkSize: 100
7+
}
8+
}
9+
};

0 commit comments

Comments
 (0)