Skip to content

Commit 2ab3a9d

Browse files
committed
feat(search): implement local search plugin powered by MiniSearch
- Add full indexing pipeline: render markdown to HTML, split into sections, build MiniSearch index - Support locale-based index splitting via siteConfig.languages - Add virtual module @localSearchIndex with lazy-loaded per-locale chunks - Add dev server integration with HMR support for .md file changes - Support frontmatter search: false to exclude pages from indexing - Port splitPageIntoSections from VitePress with safe regex patterns - Add minisearch dependency to package.json and pnpm catalog
1 parent 8c985a3 commit 2ab3a9d

File tree

4 files changed

+226
-97
lines changed

4 files changed

+226
-97
lines changed
Lines changed: 218 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,32 @@
11
/**
2-
* @TODO localSearch by minisearch
2+
* Local search powered by MiniSearch
3+
* @see https://github.com/vuejs/vitepress/blob/main/src/node/plugins/localSearchPlugin.ts
34
*/
45

5-
import type { Plugin } from 'vite'
6+
import type { Plugin, ViteDevServer } from 'vite'
67
import type { ResolvedValaxyOptions } from '../types'
7-
// import path from 'node:path'
8-
// import { slash } from '@antfu/utils'
9-
// import _debug from 'debug'
10-
// import pMap from 'p-map'
8+
import type { MarkdownEnv } from './markdown/env'
9+
import path from 'node:path'
10+
import process from 'node:process'
11+
import { slash } from '@antfu/utils'
12+
import _debug from 'debug'
13+
import fs from 'fs-extra'
14+
import MiniSearch from 'minisearch'
15+
import pMap from 'p-map'
16+
import { createMarkdownRenderer } from './markdown'
17+
import { processIncludes } from './markdown/utils/processInclude'
18+
19+
const debug = _debug('valaxy:local-search')
1120

1221
const LOCAL_SEARCH_INDEX_ID = '@localSearchIndex'
1322
const LOCAL_SEARCH_INDEX_REQUEST_PATH = `/${LOCAL_SEARCH_INDEX_ID}`
1423

15-
// const debug = _debug('vitepress:local-search')
24+
interface IndexObject {
25+
id: string
26+
text: string
27+
title: string
28+
titles: string[]
29+
}
1630

1731
export async function localSearchPlugin(
1832
options: ResolvedValaxyOptions,
@@ -35,100 +49,207 @@ export async function localSearchPlugin(
3549
}
3650
}
3751

38-
// let server: ViteDevServer | undefined
39-
40-
// function onIndexUpdated() {
41-
// if (server) {
42-
// server.moduleGraph.onFileChange(LOCAL_SEARCH_INDEX_REQUEST_PATH)
43-
// // HMR
44-
// const mod = server.moduleGraph.getModuleById(
45-
// LOCAL_SEARCH_INDEX_REQUEST_PATH,
46-
// )
47-
// if (!mod)
48-
// return
49-
// server.ws.send({
50-
// type: 'update',
51-
// updates: [
52-
// {
53-
// acceptedPath: mod.url,
54-
// path: mod.url,
55-
// timestamp: Date.now(),
56-
// type: 'js-update',
57-
// },
58-
// ],
59-
// })
60-
// }
61-
// }
62-
63-
// function getDocId(file: string) {
64-
// let relFile = slash(path.relative(siteConfig.srcDir, file))
65-
// relFile = siteConfig.rewrites.map[relFile] || relFile
66-
// let id = slash(path.join(siteConfig.site.base, relFile))
67-
// id = id.replace(/(^|\/)index\.md$/, '$1')
68-
// id = id.replace(/\.md$/, siteConfig.cleanUrls ? '' : '.html')
69-
// return id
70-
// }
71-
72-
// async function indexFile(page: string) {
73-
// const file = path.join(siteConfig.srcDir, page)
74-
// // get file metadata
75-
// const fileId = getDocId(file)
76-
// const locale = getLocaleForPath(siteConfig.site, page)
77-
// const index = getIndexByLocale(locale)
78-
// // retrieve file and split into "sections"
79-
// const html = await render(file)
80-
// const sections
81-
// // user provided generator
82-
// = (await options.miniSearch?._splitIntoSections?.(file, html))
83-
// // default implementation
84-
// ?? splitPageIntoSections(html)
85-
// // add sections to the locale index
86-
// for await (const section of sections) {
87-
// if (!section || !(section.text || section.titles))
88-
// break
89-
// const { anchor, text, titles } = section
90-
// const id = anchor ? [fileId, anchor].join('#') : fileId
91-
// index.add({
92-
// id,
93-
// text,
94-
// title: titles.at(-1)!,
95-
// titles: titles.slice(0, -1),
96-
// })
97-
// }
98-
// }
99-
100-
// async function scanForBuild() {
101-
// debug('🔍️ Indexing files for search...')
102-
// await pMap(siteConfig.pages, indexFile, {
103-
// concurrency: siteConfig.buildConcurrency,
104-
// })
105-
// debug('✅ Indexing finished...')
106-
// }
52+
const srcDir = path.resolve(options.userRoot, 'pages')
53+
const md = await createMarkdownRenderer(options)
54+
55+
async function render(file: string) {
56+
if (!fs.existsSync(file))
57+
return ''
58+
const relativePath = slash(path.relative(srcDir, file))
59+
const env: MarkdownEnv = { path: file, relativePath }
60+
const mdRaw = await fs.promises.readFile(file, 'utf-8')
61+
const mdSrc = processIncludes(srcDir, mdRaw, file)
62+
const html = await md.renderAsync(mdSrc, env)
63+
return env.frontmatter?.search === false ? '' : html
64+
}
65+
66+
const indexByLocales = new Map<string, MiniSearch<IndexObject>>()
67+
68+
function getIndexByLocale(locale: string) {
69+
let index = indexByLocales.get(locale)
70+
if (!index) {
71+
index = new MiniSearch<IndexObject>({
72+
fields: ['title', 'titles', 'text'],
73+
storeFields: ['title', 'titles'],
74+
})
75+
indexByLocales.set(locale, index)
76+
}
77+
return index
78+
}
79+
80+
let server: ViteDevServer | undefined
81+
82+
function onIndexUpdated() {
83+
if (server) {
84+
server.moduleGraph.onFileChange(LOCAL_SEARCH_INDEX_REQUEST_PATH)
85+
const mod = server.moduleGraph.getModuleById(
86+
LOCAL_SEARCH_INDEX_REQUEST_PATH,
87+
)
88+
if (!mod)
89+
return
90+
server.ws.send({
91+
type: 'update',
92+
updates: [
93+
{
94+
acceptedPath: mod.url,
95+
path: mod.url,
96+
timestamp: Date.now(),
97+
type: 'js-update',
98+
},
99+
],
100+
})
101+
}
102+
}
103+
104+
function getDocId(file: string) {
105+
const relFile = slash(path.relative(srcDir, file))
106+
let id = slash(path.join('/', relFile))
107+
id = id.replace(/(^|\/)index\.md$/, '$1')
108+
id = id.replace(/\.md$/, '.html')
109+
return id
110+
}
111+
112+
function getLocaleForPath(page: string): string {
113+
const languages = siteConfig.languages || ['en']
114+
const firstSegment = page.split('/')[0]
115+
if (languages.includes(firstSegment)) {
116+
return firstSegment
117+
}
118+
return 'root'
119+
}
120+
121+
async function indexFile(page: string) {
122+
const file = path.join(srcDir, page)
123+
const fileId = getDocId(file)
124+
const locale = getLocaleForPath(page)
125+
const index = getIndexByLocale(locale)
126+
127+
const html = await render(file)
128+
const sections = splitPageIntoSections(html)
129+
for (const section of sections) {
130+
if (!section || !(section.text || section.titles))
131+
break
132+
const { anchor, text, titles } = section
133+
const id = anchor ? [fileId, anchor].join('#') : fileId
134+
index.add({
135+
id,
136+
text,
137+
title: titles.at(-1)!,
138+
titles: titles.slice(0, -1),
139+
})
140+
}
141+
}
142+
143+
async function scanForBuild() {
144+
debug('Indexing files for search...')
145+
indexByLocales.clear()
146+
await pMap(options.pages, indexFile, {
147+
concurrency: 10,
148+
})
149+
debug('Indexing finished..., %d locales', indexByLocales.size)
150+
}
107151

108152
return {
109153
name: 'valaxy:local-search',
110-
config: () => {
111-
return {
112-
optimizeDeps: {
113-
include: [
114-
'valaxy > @vueuse/integrations/useFocusTrap',
115-
'valaxy > mark.js/src/vanilla.js',
116-
'valaxy > minisearch',
117-
],
118-
},
119-
120-
// async configureServer(_server) {
121-
// server = _server
122-
// await scanForBuild()
123-
// onIndexUpdated()
124-
// },
125-
126-
// resolveId(id) {
127-
// if (id.startsWith(LOCAL_SEARCH_INDEX_ID)) {
128-
// return `/${id}`
129-
// }
130-
// },
154+
155+
config: () => ({
156+
optimizeDeps: {
157+
include: [
158+
'valaxy > @vueuse/integrations/useFocusTrap',
159+
'valaxy > mark.js/src/vanilla.js',
160+
'valaxy > minisearch',
161+
],
162+
},
163+
}),
164+
165+
async configureServer(_server) {
166+
server = _server
167+
await scanForBuild()
168+
onIndexUpdated()
169+
},
170+
171+
resolveId(id) {
172+
if (id.startsWith(LOCAL_SEARCH_INDEX_ID)) {
173+
return `/${id}`
174+
}
175+
},
176+
177+
async load(id) {
178+
if (id === LOCAL_SEARCH_INDEX_REQUEST_PATH) {
179+
if (process.env.NODE_ENV === 'production') {
180+
await scanForBuild()
181+
}
182+
const records: string[] = []
183+
for (const [locale] of indexByLocales) {
184+
records.push(
185+
`${JSON.stringify(locale)}: () => import('${LOCAL_SEARCH_INDEX_ID}${locale}')`,
186+
)
187+
}
188+
return `export default {${records.join(',')}}`
189+
}
190+
else if (id.startsWith(LOCAL_SEARCH_INDEX_REQUEST_PATH)) {
191+
return `export default ${JSON.stringify(
192+
JSON.stringify(
193+
indexByLocales.get(
194+
id.replace(LOCAL_SEARCH_INDEX_REQUEST_PATH, ''),
195+
) ?? {},
196+
),
197+
)}`
198+
}
199+
},
200+
201+
async handleHotUpdate({ file }) {
202+
if (file.endsWith('.md')) {
203+
const relPath = slash(path.relative(srcDir, file))
204+
if (!relPath.startsWith('..')) {
205+
// Rebuild the entire index for simplicity
206+
// (avoids accessing protected MiniSearch internals for discard)
207+
await scanForBuild()
208+
debug('Updated index for %s', relPath)
209+
onIndexUpdated()
210+
}
131211
}
132212
},
133213
}
134214
}
215+
216+
// eslint-disable-next-line regexp/no-super-linear-backtracking
217+
const headingRegex = /<h(\d+)[^>]*>(.*?<a[^>]* href="#[^"]*"[^>]*>[^<]*<\/a>)<\/h\1>/gi
218+
const headingContentRegex = /(.*)<a[^>]* href="#([^"]*)"[^>]*>[^<]*<\/a>/i
219+
220+
/**
221+
* Splits HTML into sections based on headings
222+
*/
223+
function* splitPageIntoSections(html: string) {
224+
const result = html.split(headingRegex)
225+
result.shift()
226+
let parentTitles: string[] = []
227+
for (let i = 0; i < result.length; i += 3) {
228+
const level = Number.parseInt(result[i]) - 1
229+
const heading = result[i + 1]
230+
const headingResult = headingContentRegex.exec(heading)
231+
const title = clearHtmlTags(headingResult?.[1] ?? '').trim()
232+
const anchor = headingResult?.[2] ?? ''
233+
const content = result[i + 2]
234+
if (!title || !content)
235+
continue
236+
let titles = parentTitles.slice(0, level)
237+
titles[level] = title
238+
titles = titles.filter(Boolean)
239+
yield { anchor, titles, text: getSearchableText(content) }
240+
if (level === 0) {
241+
parentTitles = [title]
242+
}
243+
else {
244+
parentTitles[level] = title
245+
}
246+
}
247+
}
248+
249+
function getSearchableText(content: string) {
250+
return clearHtmlTags(content)
251+
}
252+
253+
function clearHtmlTags(str: string) {
254+
return str.replace(/<[^>]*>/g, '')
255+
}

packages/valaxy/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@
119119
"markdown-it-task-lists": "catalog:",
120120
"medium-zoom": "catalog:",
121121
"mermaid": "catalog:",
122+
"minisearch": "catalog:",
122123
"mlly": "catalog:",
123124
"nprogress": "catalog:",
124125
"open": "catalog:",

pnpm-lock.yaml

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pnpm-workspace.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ catalog:
127127
medium-zoom: ^1.1.0
128128
mermaid: ^11.12.3
129129
minimist: ^1.2.8
130+
minisearch: ^7.2.0
130131
mlly: ^1.8.0
131132
nodemon: ^3.1.14
132133
npm-run-all: 4.1.5

0 commit comments

Comments
 (0)