11// Lexical translation parser: extract translatable segments from serialized JSON.
22// Uses blacklist-based skipping + generalized nested editor detection.
33
4- import {
5- CodeBlockNode ,
6- CodeSnippetNode ,
7- EmbedNode ,
8- ExcalidrawNode ,
9- FootnoteNode ,
10- GalleryNode ,
11- ImageNode ,
12- KaTeXBlockNode ,
13- KaTeXInlineNode ,
14- LinkCardNode ,
15- MentionNode ,
16- MermaidNode ,
17- VideoNode ,
18- } from '@haklex/rich-headless'
19-
204import {
215 BLOCK_ID_STATE_KEY ,
226 NODE_STATE_KEY ,
237} from '~/constants/lexical.constant'
8+ import {
9+ isNestedLexicalEditorState ,
10+ KNOWN_LEXICAL_STRUCTURAL_PROPS ,
11+ LEXICAL_CONTEXT_EXCALIDRAW_TYPE ,
12+ LEXICAL_CONTEXT_SKIP_BLOCKS ,
13+ LEXICAL_CONTEXT_SKIP_INLINE ,
14+ } from '~/utils/content.util'
2415
2516const FORMAT_CODE = 16
2617
27- const EXCALIDRAW_TYPE = ExcalidrawNode . getType ( )
28-
29- const SKIP_BLOCKS = new Set ( [
30- 'code' ,
31- CodeBlockNode . getType ( ) ,
32- CodeSnippetNode . getType ( ) ,
33- 'code-highlight' ,
34- ImageNode . getType ( ) ,
35- VideoNode . getType ( ) ,
36- GalleryNode . getType ( ) ,
37- LinkCardNode . getType ( ) ,
38- KaTeXBlockNode . getType ( ) ,
39- MermaidNode . getType ( ) ,
40- EmbedNode . getType ( ) ,
41- 'horizontalrule' ,
42- 'component' ,
43- ] )
44-
45- const SKIP_INLINE = new Set ( [
46- KaTeXInlineNode . getType ( ) ,
47- MentionNode . getType ( ) ,
48- FootnoteNode . getType ( ) ,
49- ] )
50-
51- const KNOWN_STRUCTURAL_PROPS = new Set ( [
52- 'children' ,
53- 'type' ,
54- 'version' ,
55- 'direction' ,
56- 'format' ,
57- 'indent' ,
58- 'style' ,
59- 'detail' ,
60- 'mode' ,
61- 'text' ,
62- 'tag' ,
63- 'listType' ,
64- 'start' ,
65- 'value' ,
66- 'url' ,
67- 'rel' ,
68- 'target' ,
69- 'colSpan' ,
70- 'headerState' ,
71- 'width' ,
72- NODE_STATE_KEY ,
73- ] )
74-
7518export interface TranslationSegment {
7619 id : string
7720 text : string
@@ -142,28 +85,6 @@ function extractExcalidrawTexts(
14285 }
14386}
14487
145- function extractExcalidrawTextForContext ( node : any ) : string {
146- if ( ! node . snapshot || typeof node . snapshot !== 'string' ) return ''
147- try {
148- const parsed = JSON . parse ( node . snapshot )
149- if ( ! parsed . store ) return ''
150- const texts : string [ ] = [ ]
151- for ( const value of Object . values ( parsed . store ) ) {
152- const shape = value as any
153- if (
154- shape ?. props ?. text &&
155- typeof shape . props . text === 'string' &&
156- shape . props . text . trim ( )
157- ) {
158- texts . push ( shape . props . text )
159- }
160- }
161- return texts . join ( '\n' )
162- } catch {
163- return ''
164- }
165- }
166-
16788function walkNode (
16889 node : any ,
16990 segments : TranslationSegment [ ] ,
@@ -174,13 +95,13 @@ function walkNode(
17495 if ( ! node ) return
17596
17697 // Handle excalidraw: extract text from shapes within snapshot
177- if ( node . type === EXCALIDRAW_TYPE ) {
98+ if ( node . type === LEXICAL_CONTEXT_EXCALIDRAW_TYPE ) {
17899 extractExcalidrawTexts ( node , propertySegments , counter , ctx )
179100 return
180101 }
181102
182- if ( SKIP_BLOCKS . has ( node . type ) ) return
183- if ( SKIP_INLINE . has ( node . type ) ) return
103+ if ( LEXICAL_CONTEXT_SKIP_BLOCKS . has ( node . type ) ) return
104+ if ( LEXICAL_CONTEXT_SKIP_INLINE . has ( node . type ) ) return
184105
185106 // Special translatable properties
186107 if (
@@ -263,17 +184,11 @@ function scanNestedEditorStates(
263184 ctx : BlockContext ,
264185) : void {
265186 for ( const [ propName , propValue ] of Object . entries ( node ) ) {
266- if ( KNOWN_STRUCTURAL_PROPS . has ( propName ) ) continue
187+ if ( KNOWN_LEXICAL_STRUCTURAL_PROPS . has ( propName ) ) continue
267188
268189 // Single nested editor state: { root: { children: [...] } }
269- if (
270- propValue &&
271- typeof propValue === 'object' &&
272- ! Array . isArray ( propValue ) &&
273- ( propValue as any ) . root &&
274- Array . isArray ( ( propValue as any ) . root . children )
275- ) {
276- for ( const child of ( propValue as any ) . root . children ) {
190+ if ( isNestedLexicalEditorState ( propValue ) ) {
191+ for ( const child of propValue . root . children ) {
277192 walkNode ( child , segments , propertySegments , counter , ctx )
278193 }
279194 continue
@@ -282,12 +197,7 @@ function scanNestedEditorStates(
282197 // Array of nested editor states
283198 if ( Array . isArray ( propValue ) ) {
284199 for ( const item of propValue ) {
285- if (
286- item &&
287- typeof item === 'object' &&
288- item . root &&
289- Array . isArray ( item . root . children )
290- ) {
200+ if ( isNestedLexicalEditorState ( item ) ) {
291201 for ( const child of item . root . children ) {
292202 walkNode ( child , segments , propertySegments , counter , ctx )
293203 }
@@ -297,69 +207,6 @@ function scanNestedEditorStates(
297207 }
298208}
299209
300- // ── Document context extraction ──
301-
302- const BLOCK_TYPES = new Set ( [
303- 'listitem' ,
304- 'tablecell' ,
305- 'tablerow' ,
306- 'details' ,
307- 'list' ,
308- 'table' ,
309- 'root' ,
310- ] )
311-
312- function extractBlockText ( node : any ) : string {
313- if ( ! node ) return ''
314- if ( node . type === EXCALIDRAW_TYPE )
315- return extractExcalidrawTextForContext ( node )
316- if ( SKIP_BLOCKS . has ( node . type ) ) return ''
317- if ( SKIP_INLINE . has ( node . type ) ) return ''
318- if ( node . type === 'text' ) return node . text ?? ''
319- if ( node . type === 'linebreak' ) return '\n'
320-
321- const parts : string [ ] = [ ]
322-
323- if ( Array . isArray ( node . children ) ) {
324- const sep = BLOCK_TYPES . has ( node . type ) ? '\n' : ''
325- const joined = node . children . map ( extractBlockText ) . filter ( Boolean ) . join ( sep )
326- if ( joined ) parts . push ( joined )
327- }
328-
329- // Nested editor states (same generic scan)
330- for ( const [ propName , propValue ] of Object . entries ( node ) ) {
331- if ( KNOWN_STRUCTURAL_PROPS . has ( propName ) ) continue
332- if (
333- propValue &&
334- typeof propValue === 'object' &&
335- ! Array . isArray ( propValue ) &&
336- ( propValue as any ) . root &&
337- Array . isArray ( ( propValue as any ) . root . children )
338- ) {
339- const nested = ( propValue as any ) . root . children
340- . map ( extractBlockText )
341- . filter ( Boolean )
342- if ( nested . length ) parts . push ( nested . join ( '\n' ) )
343- }
344- if ( Array . isArray ( propValue ) ) {
345- for ( const item of propValue ) {
346- if ( item ?. root && Array . isArray ( item . root . children ) ) {
347- const nested = item . root . children
348- . map ( extractBlockText )
349- . filter ( Boolean )
350- if ( nested . length ) parts . push ( nested . join ( '\n' ) )
351- }
352- }
353- }
354- }
355-
356- return parts . join ( '\n' )
357- }
358-
359- export function extractDocumentContext ( rootChildren : any [ ] ) : string {
360- return rootChildren . map ( extractBlockText ) . filter ( Boolean ) . join ( '\n\n' )
361- }
362-
363210// ── Parser ──
364211
365212function readBlockId ( node : any ) : string | null {
0 commit comments