@@ -512,6 +512,146 @@ function sliceLinkSpans(
512512 } ) ;
513513}
514514
515+ function sliceMarkdownIR ( ir : MarkdownIR , start : number , end : number ) : MarkdownIR {
516+ return {
517+ text : ir . text . slice ( start , end ) ,
518+ styles : sliceStyleSpans ( ir . styles , start , end ) ,
519+ links : sliceLinkSpans ( ir . links , start , end ) ,
520+ } ;
521+ }
522+
523+ function mergeAdjacentStyleSpans ( styles : MarkdownIR [ "styles" ] ) : MarkdownIR [ "styles" ] {
524+ const merged : MarkdownIR [ "styles" ] = [ ] ;
525+ for ( const span of styles ) {
526+ const last = merged . at ( - 1 ) ;
527+ if ( last && last . style === span . style && span . start <= last . end ) {
528+ last . end = Math . max ( last . end , span . end ) ;
529+ continue ;
530+ }
531+ merged . push ( { ...span } ) ;
532+ }
533+ return merged ;
534+ }
535+
536+ function mergeAdjacentLinkSpans ( links : MarkdownIR [ "links" ] ) : MarkdownIR [ "links" ] {
537+ const merged : MarkdownIR [ "links" ] = [ ] ;
538+ for ( const link of links ) {
539+ const last = merged . at ( - 1 ) ;
540+ if ( last && last . href === link . href && link . start <= last . end ) {
541+ last . end = Math . max ( last . end , link . end ) ;
542+ continue ;
543+ }
544+ merged . push ( { ...link } ) ;
545+ }
546+ return merged ;
547+ }
548+
549+ function mergeMarkdownIRChunks ( left : MarkdownIR , right : MarkdownIR ) : MarkdownIR {
550+ const offset = left . text . length ;
551+ return {
552+ text : left . text + right . text ,
553+ styles : mergeAdjacentStyleSpans ( [
554+ ...left . styles ,
555+ ...right . styles . map ( ( span ) => ( {
556+ ...span ,
557+ start : span . start + offset ,
558+ end : span . end + offset ,
559+ } ) ) ,
560+ ] ) ,
561+ links : mergeAdjacentLinkSpans ( [
562+ ...left . links ,
563+ ...right . links . map ( ( link ) => ( {
564+ ...link ,
565+ start : link . start + offset ,
566+ end : link . end + offset ,
567+ } ) ) ,
568+ ] ) ,
569+ } ;
570+ }
571+
572+ function renderTelegramChunkHtml ( ir : MarkdownIR ) : string {
573+ return wrapFileReferencesInHtml ( renderTelegramHtml ( ir ) ) ;
574+ }
575+
576+ function findMarkdownIRPreservedSplitIndex ( text : string , start : number , limit : number ) : number {
577+ const maxEnd = Math . min ( text . length , start + limit ) ;
578+ if ( maxEnd >= text . length ) {
579+ return text . length ;
580+ }
581+
582+ let lastOutsideParenNewlineBreak = - 1 ;
583+ let lastOutsideParenWhitespaceBreak = - 1 ;
584+ let lastOutsideParenWhitespaceRunStart = - 1 ;
585+ let lastAnyNewlineBreak = - 1 ;
586+ let lastAnyWhitespaceBreak = - 1 ;
587+ let lastAnyWhitespaceRunStart = - 1 ;
588+ let parenDepth = 0 ;
589+ let sawNonWhitespace = false ;
590+
591+ for ( let index = start ; index < maxEnd ; index += 1 ) {
592+ const char = text [ index ] ;
593+ if ( char === "(" ) {
594+ sawNonWhitespace = true ;
595+ parenDepth += 1 ;
596+ continue ;
597+ }
598+ if ( char === ")" && parenDepth > 0 ) {
599+ sawNonWhitespace = true ;
600+ parenDepth -= 1 ;
601+ continue ;
602+ }
603+ if ( ! / \s / . test ( char ) ) {
604+ sawNonWhitespace = true ;
605+ continue ;
606+ }
607+ if ( ! sawNonWhitespace ) {
608+ continue ;
609+ }
610+ if ( char === "\n" ) {
611+ lastAnyNewlineBreak = index + 1 ;
612+ if ( parenDepth === 0 ) {
613+ lastOutsideParenNewlineBreak = index + 1 ;
614+ }
615+ continue ;
616+ }
617+ const whitespaceRunStart =
618+ index === start || ! / \s / . test ( text [ index - 1 ] ?? "" ) ? index : lastAnyWhitespaceRunStart ;
619+ lastAnyWhitespaceBreak = index + 1 ;
620+ lastAnyWhitespaceRunStart = whitespaceRunStart ;
621+ if ( parenDepth === 0 ) {
622+ lastOutsideParenWhitespaceBreak = index + 1 ;
623+ lastOutsideParenWhitespaceRunStart = whitespaceRunStart ;
624+ }
625+ }
626+
627+ const resolveWhitespaceBreak = ( breakIndex : number , runStart : number ) : number => {
628+ if ( breakIndex <= start ) {
629+ return breakIndex ;
630+ }
631+ if ( runStart <= start ) {
632+ return breakIndex ;
633+ }
634+ return / \s / . test ( text [ breakIndex ] ?? "" ) ? runStart : breakIndex ;
635+ } ;
636+
637+ if ( lastOutsideParenNewlineBreak > start ) {
638+ return lastOutsideParenNewlineBreak ;
639+ }
640+ if ( lastOutsideParenWhitespaceBreak > start ) {
641+ return resolveWhitespaceBreak (
642+ lastOutsideParenWhitespaceBreak ,
643+ lastOutsideParenWhitespaceRunStart ,
644+ ) ;
645+ }
646+ if ( lastAnyNewlineBreak > start ) {
647+ return lastAnyNewlineBreak ;
648+ }
649+ if ( lastAnyWhitespaceBreak > start ) {
650+ return resolveWhitespaceBreak ( lastAnyWhitespaceBreak , lastAnyWhitespaceRunStart ) ;
651+ }
652+ return maxEnd ;
653+ }
654+
515655function splitMarkdownIRPreserveWhitespace ( ir : MarkdownIR , limit : number ) : MarkdownIR [ ] {
516656 if ( ! ir . text ) {
517657 return [ ] ;
@@ -523,7 +663,7 @@ function splitMarkdownIRPreserveWhitespace(ir: MarkdownIR, limit: number): Markd
523663 const chunks : MarkdownIR [ ] = [ ] ;
524664 let cursor = 0 ;
525665 while ( cursor < ir . text . length ) {
526- const end = Math . min ( ir . text . length , cursor + normalizedLimit ) ;
666+ const end = findMarkdownIRPreservedSplitIndex ( ir . text , cursor , normalizedLimit ) ;
527667 chunks . push ( {
528668 text : ir . text . slice ( cursor , end ) ,
529669 styles : sliceStyleSpans ( ir . styles , cursor , end ) ,
@@ -534,32 +674,98 @@ function splitMarkdownIRPreserveWhitespace(ir: MarkdownIR, limit: number): Markd
534674 return chunks ;
535675}
536676
677+ function coalesceWhitespaceOnlyMarkdownIRChunks ( chunks : MarkdownIR [ ] , limit : number ) : MarkdownIR [ ] {
678+ const coalesced : MarkdownIR [ ] = [ ] ;
679+ let index = 0 ;
680+
681+ while ( index < chunks . length ) {
682+ const chunk = chunks [ index ] ;
683+ if ( ! chunk ) {
684+ index += 1 ;
685+ continue ;
686+ }
687+ if ( chunk . text . trim ( ) . length > 0 ) {
688+ coalesced . push ( chunk ) ;
689+ index += 1 ;
690+ continue ;
691+ }
692+
693+ const prev = coalesced . at ( - 1 ) ;
694+ const next = chunks [ index + 1 ] ;
695+ const chunkLength = chunk . text . length ;
696+
697+ const canMergePrev = ( candidate : MarkdownIR ) =>
698+ renderTelegramChunkHtml ( candidate ) . length <= limit ;
699+ const canMergeNext = ( candidate : MarkdownIR ) =>
700+ renderTelegramChunkHtml ( candidate ) . length <= limit ;
701+
702+ if ( prev ) {
703+ const mergedPrev = mergeMarkdownIRChunks ( prev , chunk ) ;
704+ if ( canMergePrev ( mergedPrev ) ) {
705+ coalesced [ coalesced . length - 1 ] = mergedPrev ;
706+ index += 1 ;
707+ continue ;
708+ }
709+ }
710+
711+ if ( next ) {
712+ const mergedNext = mergeMarkdownIRChunks ( chunk , next ) ;
713+ if ( canMergeNext ( mergedNext ) ) {
714+ chunks [ index + 1 ] = mergedNext ;
715+ index += 1 ;
716+ continue ;
717+ }
718+ }
719+
720+ if ( prev && next ) {
721+ for ( let prefixLength = chunkLength - 1 ; prefixLength >= 1 ; prefixLength -= 1 ) {
722+ const prefix = sliceMarkdownIR ( chunk , 0 , prefixLength ) ;
723+ const suffix = sliceMarkdownIR ( chunk , prefixLength , chunkLength ) ;
724+ const mergedPrev = mergeMarkdownIRChunks ( prev , prefix ) ;
725+ const mergedNext = mergeMarkdownIRChunks ( suffix , next ) ;
726+ if ( canMergePrev ( mergedPrev ) && canMergeNext ( mergedNext ) ) {
727+ coalesced [ coalesced . length - 1 ] = mergedPrev ;
728+ chunks [ index + 1 ] = mergedNext ;
729+ break ;
730+ }
731+ }
732+ }
733+
734+ index += 1 ;
735+ }
736+
737+ return coalesced ;
738+ }
739+
537740function renderTelegramChunksWithinHtmlLimit (
538741 ir : MarkdownIR ,
539742 limit : number ,
540743) : TelegramFormattedChunk [ ] {
541744 const normalizedLimit = Math . max ( 1 , Math . floor ( limit ) ) ;
542745 const pending = chunkMarkdownIR ( ir , normalizedLimit ) ;
543- const rendered : TelegramFormattedChunk [ ] = [ ] ;
746+ const finalized : MarkdownIR [ ] = [ ] ;
544747 while ( pending . length > 0 ) {
545748 const chunk = pending . shift ( ) ;
546749 if ( ! chunk ) {
547750 continue ;
548751 }
549- const html = wrapFileReferencesInHtml ( renderTelegramHtml ( chunk ) ) ;
752+ const html = renderTelegramChunkHtml ( chunk ) ;
550753 if ( html . length <= normalizedLimit || chunk . text . length <= 1 ) {
551- rendered . push ( { html , text : chunk . text } ) ;
754+ finalized . push ( chunk ) ;
552755 continue ;
553756 }
554757 const split = splitTelegramChunkByHtmlLimit ( chunk , normalizedLimit , html . length ) ;
555758 if ( split . length <= 1 ) {
556759 // Worst-case safety: avoid retry loops, deliver the chunk as-is.
557- rendered . push ( { html , text : chunk . text } ) ;
760+ finalized . push ( chunk ) ;
558761 continue ;
559762 }
560763 pending . unshift ( ...split ) ;
561764 }
562- return rendered ;
765+ return coalesceWhitespaceOnlyMarkdownIRChunks ( finalized , normalizedLimit ) . map ( ( chunk ) => ( {
766+ html : renderTelegramChunkHtml ( chunk ) ,
767+ text : chunk . text ,
768+ } ) ) ;
563769}
564770
565771export function markdownToTelegramChunks (
0 commit comments