1use std::mem::take;
8
9use log::warn;
10
11use crate::CONFIG;
12use crate::types::{
13 Delimiter, MdBlockElement, MdInlineElement, MdListItem, MdTableCell, TableAlignment, Token,
14 TokenCursor,
15};
16use crate::utils::push_buffer_to_collection;
17
18pub fn parse_blocks(markdown_lines: &[Vec<Token>]) -> Vec<MdBlockElement> {
26 let mut block_elements: Vec<MdBlockElement> = Vec::new();
27
28 for line in markdown_lines {
29 if let Some(element) = parse_block(line) {
30 block_elements.push(element)
31 }
32 }
33
34 block_elements
35}
36
37fn parse_block(line: &[Token]) -> Option<MdBlockElement> {
45 let first_token = line.first();
46
47 match first_token {
48 Some(Token::Punctuation(string)) if string == "#" => Some(parse_heading(line)),
49 Some(Token::Punctuation(string)) if string == "-" || string == "*" => {
50 if line.len() == 1 && string == "-" {
53 Some(MdBlockElement::ThematicBreak)
55 } else {
56 Some(parse_unordered_list(line))
57 }
58 }
59 Some(Token::OrderedListMarker(_)) => Some(parse_ordered_list(line)),
60 Some(Token::CodeFence) => Some(parse_codeblock(line)),
61 Some(Token::ThematicBreak) => Some(MdBlockElement::ThematicBreak),
62 Some(Token::TableCellSeparator) => Some(parse_table(line)),
63 Some(Token::BlockQuoteMarker) => Some(parse_blockquote(line)),
64 Some(Token::RawHtmlTag(_)) => Some(parse_raw_html(line)),
65 Some(Token::Tab) => Some(parse_indented_codeblock(line)),
66 Some(Token::Newline) => None,
67 _ => Some(MdBlockElement::Paragraph {
68 content: parse_inline(line),
69 }),
70 }
71}
72
73fn parse_indented_codeblock(line: &[Token]) -> MdBlockElement {
85 let mut code_content: Vec<String> = Vec::new();
86 let mut line_buffer: String = String::new();
87
88 let lines_split_by_newline = line.split(|token| token == &Token::Newline);
89
90 lines_split_by_newline.for_each(|token_line| {
91 if token_line.is_empty() {
92 return;
93 }
94
95 for token in &token_line[1..] {
96 match token {
97 Token::Tab => {
98 line_buffer.push_str(&" ".repeat(CONFIG.get().unwrap().lexer.tab_size));
99 }
100 Token::Text(string) | Token::Punctuation(string) => line_buffer.push_str(string),
101 Token::Whitespace => line_buffer.push(' '),
102 Token::Newline => {
103 push_buffer_to_collection(&mut code_content, &mut line_buffer);
104 }
105 Token::Escape(esc_char) => {
106 line_buffer.push_str(&format!("\\{esc_char}"));
107 }
108 Token::OrderedListMarker(string) => line_buffer.push_str(string),
109 Token::EmphasisRun { delimiter, length } => {
110 line_buffer.push_str(&delimiter.to_string().repeat(*length))
111 }
112 Token::OpenParenthesis => line_buffer.push('('),
113 Token::CloseParenthesis => line_buffer.push(')'),
114 Token::OpenBracket => line_buffer.push('['),
115 Token::CloseBracket => line_buffer.push(']'),
116 Token::TableCellSeparator => line_buffer.push('|'),
117 Token::CodeTick => line_buffer.push('`'),
118 Token::CodeFence => line_buffer.push_str("```"),
119 Token::BlockQuoteMarker => line_buffer.push('>'),
120 Token::ThematicBreak => line_buffer.push_str("---"),
121 Token::RawHtmlTag(tag_content) => {
122 let escaped_tag = tag_content.replace("<", "<").replace(">", ">");
124 line_buffer.push_str(&escaped_tag);
125 }
126 }
127 }
128
129 push_buffer_to_collection(&mut code_content, &mut line_buffer);
130 });
131
132 MdBlockElement::CodeBlock {
133 language: None,
134 lines: code_content,
135 }
136}
137
138fn parse_raw_html(line: &[Token]) -> MdBlockElement {
146 let mut html_content = String::new();
147 for token in line {
148 match token {
149 Token::RawHtmlTag(tag_content) => html_content.push_str(tag_content),
150 Token::Text(string) | Token::Punctuation(string) => html_content.push_str(string),
151 Token::Whitespace => html_content.push(' '),
152 Token::Escape(esc_char) => {
153 html_content.push_str(&format!("\\{esc_char}"));
154 }
155 Token::Newline => html_content.push('\n'),
156 Token::OrderedListMarker(string) => html_content.push_str(string),
157 Token::EmphasisRun { delimiter, length } => {
158 html_content.push_str(&delimiter.to_string().repeat(*length))
159 }
160 Token::OpenParenthesis => html_content.push('('),
161 Token::CloseParenthesis => html_content.push(')'),
162 Token::OpenBracket => html_content.push('['),
163 Token::CloseBracket => html_content.push(']'),
164 Token::TableCellSeparator => html_content.push('|'),
165 Token::CodeTick => html_content.push('`'),
166 Token::CodeFence => html_content.push_str("```"),
167 Token::BlockQuoteMarker => html_content.push('>'),
168 Token::Tab => {
169 html_content.push_str(&" ".repeat(CONFIG.get().unwrap().lexer.tab_size));
170 }
171 Token::ThematicBreak => html_content.push_str("---"),
172 }
173 }
174
175 MdBlockElement::RawHtml {
176 content: html_content,
177 }
178}
179
180fn parse_blockquote(line: &[Token]) -> MdBlockElement {
189 let lines_split_by_newline = line.split(|token| token == &Token::Newline);
190
191 let inner_blocks: Vec<Vec<Token>> = lines_split_by_newline
192 .map(|tokens| {
193 let mut result = Vec::new();
194 if tokens.first() == Some(&Token::BlockQuoteMarker)
195 && tokens.get(1) == Some(&Token::Whitespace)
196 {
197 result.extend_from_slice(&tokens[2..]);
198 } else if tokens.first() == Some(&Token::BlockQuoteMarker) {
199 result.extend_from_slice(&tokens[1..]);
200 } else {
201 result.extend_from_slice(tokens);
202 }
203 result
204 })
205 .collect();
206
207 let grouped_inner_blocks = group_lines_to_blocks(inner_blocks);
208
209 let content = parse_blocks(&grouped_inner_blocks);
210
211 if content.is_empty() {
212 MdBlockElement::Paragraph {
213 content: parse_inline(line),
214 }
215 } else {
216 MdBlockElement::BlockQuote { content }
217 }
218}
219
220fn parse_ordered_list(list: &[Token]) -> MdBlockElement {
230 let starting_num = if let Some(Token::OrderedListMarker(num)) = list.first() {
231 num.parse::<usize>().unwrap_or(1)
232 } else {
233 1
234 };
235 parse_list(
236 list,
237 |tokens| {
238 matches!(
239 tokens.first(),
240 Some(Token::OrderedListMarker(_)) if tokens.get(1) == Some(&Token::Whitespace)
241 )
242 },
243 |items| MdBlockElement::OrderedList {
244 items,
245 starting_num,
246 },
247 )
248}
249
250fn parse_unordered_list(list: &[Token]) -> MdBlockElement {
260 parse_list(
261 list,
262 |tokens| {
263 matches!(tokens.first(), Some(Token::Punctuation(string)) if (string == "-" || string == "*") && tokens.get(1) == Some(&Token::Whitespace)
264 )
265 },
266 |items| MdBlockElement::UnorderedList { items },
267 )
268}
269
270fn parse_list<F, G>(list: &[Token], is_list_item: F, make_block: G) -> MdBlockElement
283where
284 F: Fn(&[Token]) -> bool,
285 G: Fn(Vec<MdListItem>) -> MdBlockElement,
286{
287 let lists_split_by_newline = list
288 .split(|token| token == &Token::Newline)
289 .collect::<Vec<_>>();
290 let mut list_items: Vec<MdListItem> = Vec::new();
291
292 let mut i = 0;
293 while i < lists_split_by_newline.len() {
294 let line = lists_split_by_newline[i];
295 if is_list_item(line) {
296 let content_tokens = &line[2..];
297 if let Some(content) = parse_block(content_tokens) {
298 list_items.push(MdListItem { content })
299 }
300
301 let mut nested_lines: Vec<Vec<Token>> = Vec::new();
303 let mut j = i + 1;
304 while j < lists_split_by_newline.len() {
305 let nested_line = lists_split_by_newline[j];
306 if nested_line.first() == Some(&Token::Tab) {
307 let mut nested = nested_line.to_vec();
308 while !nested.is_empty() && nested[0] == Token::Tab {
309 nested.remove(0);
310 }
311 nested_lines.push(nested);
312 j += 1;
313 } else {
314 break;
315 }
316 }
317
318 if !nested_lines.is_empty() {
319 let mut nested_tokens: Vec<Token> = Vec::new();
321 for (k, l) in nested_lines.into_iter().enumerate() {
322 if k > 0 {
323 nested_tokens.push(Token::Newline);
324 }
325 nested_tokens.extend(l);
326 }
327
328 let nested_block = if let Some(Token::OrderedListMarker(_)) = nested_tokens.first()
330 {
331 parse_ordered_list(&nested_tokens)
332 } else {
333 parse_unordered_list(&nested_tokens)
334 };
335
336 list_items.push(MdListItem {
337 content: nested_block,
338 });
339
340 i = j - 1; }
342 }
343 i += 1;
344 }
345
346 make_block(list_items)
348}
349
350fn parse_codeblock(line: &[Token]) -> MdBlockElement {
360 let mut code_content: Vec<String> = Vec::new();
361 let mut language = None;
362 let mut line_buffer: String = String::new();
363 let mut lines_split_by_newline = line
364 .split(|token| token == &Token::Newline)
365 .collect::<Vec<_>>();
366
367 if let Some(Token::Text(string)) = line.get(1) {
368 language = Some(string.clone());
369 lines_split_by_newline.remove(0);
370 }
371
372 lines_split_by_newline.iter().for_each(|line| {
373 if line.is_empty() {
374 return;
375 }
376
377 for token in line.iter() {
378 match token {
379 Token::Text(string) | Token::Punctuation(string) => line_buffer.push_str(string),
380 Token::Whitespace => line_buffer.push(' '),
381 Token::Newline => {
382 push_buffer_to_collection(&mut code_content, &mut line_buffer);
383 }
384 Token::Tab => {
385 line_buffer.push_str(&" ".repeat(CONFIG.get().unwrap().lexer.tab_size));
386 }
387 Token::Escape(esc_char) => {
388 line_buffer.push_str(&format!("\\{esc_char}"));
389 }
390 Token::OrderedListMarker(string) => line_buffer.push_str(string),
391 Token::EmphasisRun { delimiter, length } => {
392 line_buffer.push_str(&delimiter.to_string().repeat(*length))
393 }
394 Token::OpenParenthesis => line_buffer.push('('),
395 Token::CloseParenthesis => line_buffer.push(')'),
396 Token::OpenBracket => line_buffer.push('['),
397 Token::CloseBracket => line_buffer.push(']'),
398 Token::TableCellSeparator => line_buffer.push('|'),
399 Token::CodeTick => line_buffer.push('`'),
400 Token::CodeFence => {}
401 Token::BlockQuoteMarker => line_buffer.push('>'),
402 Token::RawHtmlTag(tag_content) => {
403 let escaped_tag = tag_content.replace("<", "<").replace(">", ">");
404 line_buffer.push_str(&escaped_tag);
405 }
406 Token::ThematicBreak => line_buffer.push_str("---"),
407 }
408 }
409
410 push_buffer_to_collection(&mut code_content, &mut line_buffer);
411 });
412
413 push_buffer_to_collection(&mut code_content, &mut line_buffer);
414
415 MdBlockElement::CodeBlock {
416 language,
417 lines: code_content,
418 }
419}
420
421fn parse_heading(line: &[Token]) -> MdBlockElement {
431 let mut heading_level = 0;
432 let mut i = 0;
433 while let Some(token) = line.get(i) {
434 match token {
435 Token::Punctuation(string) => {
436 if string == "#" {
437 heading_level += 1;
438 } else {
439 break;
440 }
441 }
442 _ => break,
443 }
444 i += 1;
445 }
446
447 if i >= line.len() || line.get(i) != Some(&Token::Whitespace) {
449 return MdBlockElement::Paragraph {
450 content: parse_inline(line),
451 };
452 }
453
454 MdBlockElement::Header {
455 level: heading_level,
456 content: parse_inline(&line[i + 1..]),
457 }
458}
459
460pub fn parse_table(line: &[Token]) -> MdBlockElement {
462 let rows = line
463 .split(|token| token == &Token::Newline)
464 .collect::<Vec<_>>();
465
466 if rows.len() < 3 {
467 return MdBlockElement::Paragraph {
468 content: parse_inline(line),
469 };
470 }
471
472 let header_row = rows
473 .first()
474 .expect("Table should have at least a header row");
475
476 let alignment_row = rows.get(1).expect("Table should have an alignment row");
477
478 let alignments: Vec<TableAlignment> = split_row(alignment_row)
479 .into_iter()
480 .map(|cell_content| {
481 let content: String = cell_content
482 .iter()
483 .filter_map(|token| match token {
484 Token::Text(s) => {
485 warn!("Table alignment should not contain text as it could result in unexpected behavior: {s}");
486 Some(s.to_owned())
487 }
488 Token::Punctuation(s) => Some(s.to_owned()),
489 Token::ThematicBreak => Some("---".to_string()),
490 _ => None,
491 })
492 .collect();
493
494 match (content.starts_with(':'), content.ends_with(':')) {
495 (true, true) => TableAlignment::Center,
496 (true, false) => TableAlignment::Left,
497 (false, true) => TableAlignment::Right,
498 _ => TableAlignment::None,
499 }
500 })
501 .collect();
502
503 let headers: Vec<MdTableCell> = split_row(header_row)
504 .into_iter()
505 .enumerate()
506 .map(|(i, cell_content)| MdTableCell {
507 content: parse_inline(cell_content),
508 alignment: alignments.get(i).cloned().unwrap_or(TableAlignment::None),
509 is_header: true,
510 })
511 .collect();
512
513 let body: Vec<Vec<MdTableCell>> = rows
514 .iter()
515 .skip(2)
516 .map(|row| {
517 split_row(row)
518 .into_iter()
519 .enumerate()
520 .map(|(i, cell_tokens)| MdTableCell {
521 content: parse_inline(cell_tokens),
522 alignment: alignments.get(i).cloned().unwrap_or(TableAlignment::None),
523 is_header: false,
524 })
525 .collect()
526 })
527 .collect();
528
529 MdBlockElement::Table { headers, body }
530}
531
532fn split_row(row: &[Token]) -> Vec<&[Token]> {
537 let mut cells: Vec<&[Token]> = row
538 .split(|token| token == &Token::TableCellSeparator)
539 .collect();
540
541 if let Some(first) = cells.first() {
542 if first.is_empty() {
543 cells.remove(0);
544 }
545 }
546 if let Some(last) = cells.last() {
547 if last.is_empty() {
548 cells.pop();
549 }
550 }
551
552 cells
553}
554
555pub fn parse_inline(markdown_tokens: &[Token]) -> Vec<MdInlineElement> {
564 let mut parsed_inline_elements: Vec<MdInlineElement> = Vec::new();
565
566 let mut cursor: TokenCursor = TokenCursor {
567 tokens: markdown_tokens.to_vec(),
568 current_position: 0,
569 };
570
571 let mut delimiter_stack: Vec<Delimiter> = Vec::new();
572
573 let mut buffer: String = String::new();
574
575 let mut current_token: &Token;
576 while !cursor.is_at_eof() {
577 current_token = cursor.current().expect("Token should be valid markdown");
578
579 match current_token {
580 Token::EmphasisRun { delimiter, length } => {
581 push_buffer_to_collection(&mut parsed_inline_elements, &mut buffer);
582
583 delimiter_stack.push(Delimiter {
584 run_length: *length,
585 ch: *delimiter,
586 token_position: cursor.position(),
587 parsed_position: parsed_inline_elements.len(),
588 active: true,
589 can_open: true,
590 can_close: true,
591 });
592
593 parsed_inline_elements.push(MdInlineElement::Placeholder {
594 ch: *delimiter,
595 token_position: cursor.position(),
596 });
597 }
598 Token::OpenBracket => {
599 push_buffer_to_collection(&mut parsed_inline_elements, &mut buffer);
600
601 let link_element =
602 parse_link_type(&mut cursor, |label, title, url| MdInlineElement::Link {
603 text: label,
604 title,
605 url,
606 });
607 parsed_inline_elements.push(link_element);
608 }
609 Token::CodeTick => {
610 cursor.advance();
612 push_buffer_to_collection(&mut parsed_inline_elements, &mut buffer);
613
614 let code_content = parse_code_span(&mut cursor);
615
616 if cursor.current() != Some(&Token::CodeTick) {
617 parsed_inline_elements.push(MdInlineElement::Text {
618 content: format!("`{code_content}`"),
619 });
620 } else {
621 parsed_inline_elements.push(MdInlineElement::Code {
622 content: code_content,
623 });
624 }
625 }
626 Token::Punctuation(string) if string == "!" => {
627 if cursor.peek_ahead(1) != Some(&Token::OpenBracket) {
628 buffer.push('!');
630 cursor.advance();
631 continue;
632 }
633
634 push_buffer_to_collection(&mut parsed_inline_elements, &mut buffer);
635 cursor.advance(); let image =
638 parse_link_type(&mut cursor, |label, title, url| MdInlineElement::Image {
639 alt_text: flatten_inline(&label),
640 title,
641 url,
642 });
643
644 parsed_inline_elements.push(image);
645 }
646 Token::Escape(esc_char) => buffer.push_str(&format!("\\{esc_char}")),
647 Token::Text(string) | Token::Punctuation(string) => buffer.push_str(string),
648 Token::OrderedListMarker(string) => buffer.push_str(string),
649 Token::Whitespace => buffer.push(' '),
650 Token::CloseBracket => buffer.push(']'),
651 Token::OpenParenthesis => buffer.push('('),
652 Token::CloseParenthesis => buffer.push(')'),
653 Token::ThematicBreak => buffer.push_str("---"),
654 Token::TableCellSeparator => buffer.push('|'),
655 Token::BlockQuoteMarker => buffer.push('>'),
656 Token::RawHtmlTag(tag_content) => buffer.push_str(tag_content),
657 _ => push_buffer_to_collection(&mut parsed_inline_elements, &mut buffer),
658 }
659
660 cursor.advance();
661 }
662
663 push_buffer_to_collection(&mut parsed_inline_elements, &mut buffer);
664
665 delimiter_stack
666 .iter_mut()
667 .for_each(|el| el.classify_flanking(&cursor.tokens));
668
669 resolve_emphasis(&mut parsed_inline_elements, &mut delimiter_stack);
670
671 parsed_inline_elements
672}
673
674fn parse_code_span(cursor: &mut TokenCursor) -> String {
682 let mut code_content: String = String::new();
683 while let Some(next_token) = cursor.current() {
684 match next_token {
685 Token::CodeTick => break,
686 Token::Text(string) | Token::Punctuation(string) => code_content.push_str(string),
687 Token::OrderedListMarker(string) => code_content.push_str(string),
688 Token::Escape(ch) => code_content.push_str(&format!("\\{ch}")),
689 Token::OpenParenthesis => code_content.push('('),
690 Token::CloseParenthesis => code_content.push(')'),
691 Token::OpenBracket => code_content.push('['),
692 Token::CloseBracket => code_content.push(']'),
693 Token::TableCellSeparator => code_content.push('|'),
694 Token::EmphasisRun { delimiter, length } => {
695 code_content.push_str(&delimiter.to_string().repeat(*length))
696 }
697 Token::Whitespace => code_content.push(' '),
698 Token::Tab => code_content.push_str(&" ".repeat(CONFIG.get().unwrap().lexer.tab_size)),
699 Token::Newline => code_content.push('\n'),
700 Token::ThematicBreak => code_content.push_str("---"),
701 Token::BlockQuoteMarker => code_content.push('>'),
702 Token::RawHtmlTag(tag_content) => code_content.push_str(tag_content),
703 Token::CodeFence => {}
704 }
705
706 cursor.advance();
707 }
708
709 code_content
710}
711
712fn make_image(label: Vec<MdInlineElement>, title: Option<String>, uri: String) -> MdInlineElement {
714 MdInlineElement::Image {
715 alt_text: flatten_inline(&label),
716 title,
717 url: uri,
718 }
719}
720
721fn make_link(label: Vec<MdInlineElement>, title: Option<String>, uri: String) -> MdInlineElement {
723 MdInlineElement::Link {
724 text: label,
725 title,
726 url: uri,
727 }
728}
729
730fn parse_link_type<F>(cursor: &mut TokenCursor, make_element: F) -> MdInlineElement
741where
742 F: Fn(Vec<MdInlineElement>, Option<String>, String) -> MdInlineElement,
743{
744 let mut label_elements: Vec<MdInlineElement> = Vec::new();
745 let mut label_buffer = String::new();
746 let mut delimiter_stack: Vec<Delimiter> = Vec::new();
747 cursor.advance(); while let Some(token) = cursor.current() {
749 match token {
750 Token::CloseBracket => {
751 push_buffer_to_collection(&mut label_elements, &mut label_buffer);
752 break;
753 }
754 Token::OpenBracket => {
755 push_buffer_to_collection(&mut label_elements, &mut label_buffer);
756
757 let inner_link = parse_link_type(cursor, make_link);
758 label_elements.push(inner_link);
759 }
760 Token::EmphasisRun { delimiter, length } => {
761 push_buffer_to_collection(&mut label_elements, &mut label_buffer);
762 delimiter_stack.push(Delimiter {
763 run_length: *length,
764 ch: *delimiter,
765 token_position: cursor.position(),
766 parsed_position: label_elements.len(),
767 active: true,
768 can_open: true,
769 can_close: true,
770 });
771 label_elements.push(MdInlineElement::Placeholder {
772 ch: *delimiter,
773 token_position: cursor.position(),
774 });
775 }
776 Token::Punctuation(s) if s == "!" => {
777 if cursor.peek_ahead(1) != Some(&Token::OpenBracket) {
778 label_buffer.push('!');
779 cursor.advance();
780 continue;
781 }
782
783 push_buffer_to_collection(&mut label_elements, &mut label_buffer);
784 cursor.advance(); let inner_image = parse_link_type(cursor, make_image);
786
787 label_elements.push(inner_image);
788 }
789 Token::Text(s) | Token::Punctuation(s) => label_buffer.push_str(s),
790 Token::OrderedListMarker(s) => label_buffer.push_str(s),
791 Token::Escape(ch) => label_buffer.push_str(&format!("\\{ch}")),
792 Token::Whitespace => label_buffer.push(' '),
793 Token::ThematicBreak => label_buffer.push_str("---"),
794 Token::OpenParenthesis => label_buffer.push('('),
795 Token::CloseParenthesis => label_buffer.push(')'),
796 Token::TableCellSeparator => label_buffer.push('|'),
797 Token::BlockQuoteMarker => label_buffer.push('>'),
798 _ => {}
799 }
800 cursor.advance();
801 }
802
803 push_buffer_to_collection(&mut label_elements, &mut label_buffer);
804 resolve_emphasis(&mut label_elements, &mut delimiter_stack);
805
806 if cursor.current() != Some(&Token::CloseBracket) {
808 return MdInlineElement::Text {
809 content: format!("[{}", flatten_inline(&label_elements)),
810 };
811 }
812
813 if cursor.peek_ahead(1) != Some(&Token::OpenParenthesis) {
816 cursor.advance();
817 return MdInlineElement::Text {
818 content: format!("[{}]", flatten_inline(&label_elements)),
819 };
820 }
821
822 cursor.advance(); let mut uri = String::new();
825 let mut title = String::new();
826 let mut is_building_title = false;
827 let mut is_valid_title = true;
828 let mut has_opening_quote = false;
829
830 while let Some(token) = cursor.current() {
831 if !is_building_title {
832 match token {
833 Token::CloseParenthesis => break,
834 Token::Text(s) | Token::Punctuation(s) => uri.push_str(s),
835 Token::OrderedListMarker(s) => uri.push_str(s),
836 Token::Escape(ch) => uri.push_str(&format!("\\{ch}")),
837 Token::Whitespace => is_building_title = true,
838 Token::ThematicBreak => uri.push_str("---"),
839 Token::TableCellSeparator => uri.push('|'),
840 Token::BlockQuoteMarker => uri.push('>'),
841 Token::RawHtmlTag(tag_content) => uri.push_str(tag_content),
842 _ => {}
843 }
844 } else {
845 match token {
846 Token::CloseParenthesis => break,
847 Token::Punctuation(s) if s == "\"" => {
848 if has_opening_quote {
849 is_valid_title = true;
850 is_building_title = false;
851 } else {
852 has_opening_quote = true;
853 is_valid_title = false;
854 }
855 }
856 Token::Text(s) | Token::Punctuation(s) => title.push_str(s),
857 Token::OrderedListMarker(s) => title.push_str(s),
858 Token::Escape(ch) => title.push_str(&format!("\\{ch}")),
859 Token::EmphasisRun { delimiter, length } => {
860 title.push_str(&delimiter.to_string().repeat(*length))
861 }
862 Token::OpenBracket => title.push('['),
863 Token::CloseBracket => title.push(']'),
864 Token::OpenParenthesis => title.push('('),
865 Token::TableCellSeparator => title.push('|'),
866 Token::Tab => title.push('\t'),
867 Token::Newline => title.push_str("\\n"),
868 Token::Whitespace => title.push(' '),
869 Token::CodeTick => title.push('`'),
870 Token::CodeFence => title.push_str("```"),
871 Token::ThematicBreak => title.push_str("---"),
872 Token::BlockQuoteMarker => title.push('>'),
873 Token::RawHtmlTag(tag_content) => {
874 warn!(
875 "Raw HTML tags in titles can result in unexpected behavior: {tag_content}"
876 );
877 title.push_str(tag_content);
878 }
879 }
880 }
881 cursor.advance();
882 }
883
884 if cursor.current() != Some(&Token::CloseParenthesis) {
886 return MdInlineElement::Text {
887 content: format!("[{}]({} ", flatten_inline(&label_elements), uri),
888 };
889 } else if !title.is_empty() && !is_valid_title {
890 return MdInlineElement::Text {
891 content: format!("[{}]({} {})", flatten_inline(&label_elements), uri, title),
892 };
893 }
894
895 make_element(label_elements, Some(title).filter(|t| !t.is_empty()), uri)
896}
897
898fn flatten_inline(elements: &[MdInlineElement]) -> String {
906 let mut result = String::new();
907 for element in elements {
908 match element {
909 MdInlineElement::Text { content } => result.push_str(content),
910 MdInlineElement::Bold { content } => result.push_str(&flatten_inline(content)),
911 MdInlineElement::Italic { content } => result.push_str(&flatten_inline(content)),
912 MdInlineElement::Code { content } => result.push_str(content),
913 MdInlineElement::Link { text, .. } => result.push_str(&flatten_inline(text)),
914 MdInlineElement::Image { alt_text, .. } => result.push_str(alt_text),
915 _ => {}
916 }
917 }
918 result
919}
920
921fn resolve_emphasis(elements: &mut Vec<MdInlineElement>, delimiter_stack: &mut [Delimiter]) {
924 if delimiter_stack.len() == 1 {
925 if delimiter_stack[0].active {
927 elements[delimiter_stack[0].parsed_position] = MdInlineElement::Text {
928 content: delimiter_stack[0].ch.to_string(),
929 };
930 }
931 return;
932 }
933 resolve_emphasis_recursive(elements, delimiter_stack, 0);
934}
935
936fn resolve_emphasis_recursive(
944 elements: &mut Vec<MdInlineElement>,
945 delimiter_stack: &mut [Delimiter],
946 index: usize,
947) {
948 if index >= delimiter_stack.len() {
949 delimiter_stack.iter_mut().for_each(|el| {
950 if el.active && el.parsed_position < elements.len() {
951 let element_to_insert = MdInlineElement::Text {
952 content: el.ch.to_string().repeat(el.run_length),
953 };
954 if let Some(MdInlineElement::Placeholder { .. }) = elements.get(el.parsed_position)
955 {
956 elements.remove(el.parsed_position);
957 }
958 elements.insert(el.parsed_position, element_to_insert);
959 }
960 });
961 return;
962 }
963
964 if !delimiter_stack[index].active || !delimiter_stack[index].can_close {
965 resolve_emphasis_recursive(elements, delimiter_stack, index + 1);
966 return;
967 }
968
969 let closer = delimiter_stack[index].clone();
970
971 for j in (0..index).rev() {
972 if !delimiter_stack[j].active || !delimiter_stack[j].can_open {
973 continue;
974 }
975
976 let opener = delimiter_stack[j].clone();
977
978 if !closer.ch.eq(&opener.ch) {
979 continue;
980 }
981
982 let length_total = closer.run_length + opener.run_length;
985 if ((closer.can_open && closer.can_close) || (opener.can_open && opener.can_close))
986 && (length_total % 3 == 0 && closer.run_length % 3 != 0 && opener.run_length % 3 != 0)
987 {
988 continue;
989 }
990
991 let delimiters_used = if closer.run_length >= 2 && opener.run_length >= 2 {
992 2
993 } else {
994 1
995 };
996
997 let range_start = if opener.run_length > delimiters_used {
998 (opener.parsed_position + 1).saturating_sub(delimiters_used)
999 } else {
1000 opener.parsed_position
1001 };
1002
1003 let range_end = if closer.run_length >= delimiters_used {
1004 closer.parsed_position
1005 } else {
1006 (closer.parsed_position + 1).saturating_sub(delimiters_used)
1007 };
1008
1009 let mut content_slice = elements[range_start + 1..range_end].to_vec();
1010
1011 for i in 0..content_slice.len() {
1013 if let Some(MdInlineElement::Placeholder { ch, token_position }) = content_slice.get(i)
1014 {
1015 if delimiter_stack
1016 .iter()
1017 .any(|d| !d.active && d.token_position == *token_position && d.ch == *ch)
1018 {
1019 content_slice.remove(i);
1020 }
1021 }
1022 }
1023
1024 let element_to_insert = match delimiters_used {
1025 2 => MdInlineElement::Bold {
1026 content: content_slice,
1027 },
1028 1 => MdInlineElement::Italic {
1029 content: content_slice,
1030 },
1031 _ => unreachable!(),
1032 };
1033
1034 if closer.run_length > delimiters_used {
1035 elements[closer.parsed_position - 1] = element_to_insert;
1036 } else {
1037 elements.splice(range_start..=range_end, vec![element_to_insert]);
1038 let num_elements_removed = range_end - range_start;
1039 (0..delimiter_stack.len()).for_each(|k| {
1040 if delimiter_stack[k].parsed_position > closer.parsed_position {
1041 delimiter_stack[k].parsed_position -= num_elements_removed;
1042 }
1043 });
1044 }
1045
1046 delimiter_stack[index].run_length = delimiter_stack[index]
1047 .run_length
1048 .saturating_sub(delimiters_used);
1049 delimiter_stack[j].run_length = delimiter_stack[j]
1050 .run_length
1051 .saturating_sub(delimiters_used);
1052
1053 if delimiter_stack[index].run_length == 0 {
1054 delimiter_stack[index].active = false;
1055 }
1056 if delimiter_stack[j].run_length == 0 {
1057 delimiter_stack[j].active = false;
1058 }
1059
1060 resolve_emphasis_recursive(elements, delimiter_stack, 0);
1062 return;
1063 }
1064
1065 resolve_emphasis_recursive(elements, delimiter_stack, index + 1);
1067}
1068
1069pub fn group_lines_to_blocks(mut tokenized_lines: Vec<Vec<Token>>) -> Vec<Vec<Token>> {
1077 let mut blocks: Vec<Vec<Token>> = Vec::new();
1078 let mut current_block: Vec<Token> = Vec::new();
1079 let mut previous_block: Vec<Token>;
1080 let lines = tokenized_lines.iter_mut();
1081 let mut is_inside_code_block = false;
1082 for line in lines {
1083 previous_block = blocks.last().unwrap_or(&Vec::new()).to_vec();
1084
1085 if is_inside_code_block && line.first() != Some(&Token::CodeFence) {
1087 attach_to_previous_block(&mut blocks, &mut previous_block, line, Some(Token::Newline));
1089 continue;
1090 } else if is_inside_code_block && line.first() == Some(&Token::CodeFence) {
1091 is_inside_code_block = false;
1094 attach_to_previous_block(&mut blocks, &mut previous_block, line, None);
1095 continue;
1096 }
1097
1098 match line.first() {
1099 Some(Token::Punctuation(string)) if string == "#" => {
1100 blocks.push(line.to_owned());
1102 }
1103 Some(Token::Punctuation(string)) if string == "-" => {
1104 group_dashed_lines(&mut blocks, &mut current_block, &mut previous_block, line);
1105 }
1106 Some(Token::Punctuation(string)) if string == "*" => {
1107 group_asterisked_lines(&mut blocks, &mut current_block, &mut previous_block, line);
1108 }
1109 Some(Token::Tab) => {
1110 group_tabbed_lines(&mut blocks, &mut current_block, &mut previous_block, line);
1111 }
1112 Some(Token::OrderedListMarker(_)) => {
1113 group_ordered_list(&mut blocks, &mut current_block, &mut previous_block, line);
1114 }
1115 Some(Token::ThematicBreak) => {
1116 if let Some(previous_line_start) = previous_block.first() {
1119 match previous_line_start {
1120 Token::Punctuation(string) if string == "#" => {
1121 blocks.push(take(line));
1122 }
1123 Token::Newline => blocks.push(take(line)),
1124 _ => {
1125 previous_block.insert(0, Token::Punctuation(String::from("#")));
1126 previous_block.insert(1, Token::Punctuation(String::from("#")));
1127 previous_block.insert(2, Token::Whitespace);
1128 blocks.pop();
1129 blocks.push(take(&mut previous_block));
1130 }
1131 }
1132 } else {
1133 current_block.extend_from_slice(line);
1134 }
1135 }
1136 Some(Token::BlockQuoteMarker) => {
1137 if let Some(previous_line_start) = previous_block.first() {
1138 if matches!(previous_line_start, Token::BlockQuoteMarker) {
1139 attach_to_previous_block(
1140 &mut blocks,
1141 &mut previous_block,
1142 line,
1143 Some(Token::Newline),
1144 );
1145 } else {
1146 current_block.extend_from_slice(line);
1147 }
1148 } else {
1149 current_block.extend_from_slice(line);
1150 }
1151 }
1152 Some(Token::CodeTick) => {
1153 current_block.extend_from_slice(line);
1154 }
1155 Some(Token::CodeFence) => {
1156 if !is_inside_code_block {
1157 is_inside_code_block = true;
1158 current_block.extend_from_slice(line);
1159 } else {
1160 is_inside_code_block = false;
1161 current_block.extend_from_slice(line);
1162 blocks.push(take(&mut current_block));
1163 }
1164 }
1165 Some(Token::Text(string)) if string == "=" => {
1166 let has_trailing_content = line.iter().skip(1).any(|token| match token {
1167 Token::Text(s) if s == "=" => false,
1168 Token::Whitespace | Token::Tab | Token::Newline => false,
1169 _ => true,
1170 });
1171
1172 if let Some(previous_line_start) = previous_block.first() {
1174 if !has_trailing_content && matches!(previous_line_start, Token::Text(_)) {
1175 group_setext_heading_one(&mut blocks, &mut previous_block);
1176 } else {
1177 group_text_lines(
1178 &mut blocks,
1179 &mut current_block,
1180 &mut previous_block,
1181 line,
1182 );
1183 }
1184 } else {
1185 current_block.extend_from_slice(line);
1186 }
1187 }
1188 Some(Token::Text(_)) => {
1189 group_text_lines(&mut blocks, &mut current_block, &mut previous_block, line);
1190 }
1191 Some(Token::TableCellSeparator) => {
1192 group_table_rows(&mut blocks, &mut current_block, &mut previous_block, line);
1193 }
1194 Some(Token::Whitespace) => {
1195 group_lines_with_leading_whitespace(
1196 &mut blocks,
1197 &mut current_block,
1198 &mut previous_block,
1199 line,
1200 );
1201 }
1202 _ => {
1203 current_block.extend_from_slice(line);
1205 }
1206 }
1207
1208 if !current_block.is_empty() {
1209 blocks.push(take(&mut current_block));
1210 }
1211
1212 current_block.clear();
1213 }
1214 blocks
1215}
1216
1217fn group_table_rows(
1226 blocks: &mut Vec<Vec<Token>>,
1227 current_block: &mut Vec<Token>,
1228 previous_block: &mut Vec<Token>,
1229 line: &[Token],
1230) {
1231 if let Some(previous_line_start) = previous_block.first() {
1232 if previous_line_start == &Token::TableCellSeparator {
1233 attach_to_previous_block(blocks, previous_block, line, Some(Token::Newline));
1234 } else {
1235 current_block.extend_from_slice(line);
1236 }
1237 } else {
1238 current_block.extend_from_slice(line);
1239 }
1240}
1241
1242fn group_text_lines(
1251 blocks: &mut Vec<Vec<Token>>,
1252 current_block: &mut Vec<Token>,
1253 previous_block: &mut Vec<Token>,
1254 line: &[Token],
1255) {
1256 if !previous_block.is_empty() {
1257 if matches!(previous_block.first(), Some(Token::Text(_))) {
1258 attach_to_previous_block(blocks, previous_block, line, Some(Token::Whitespace));
1259 } else if matches!(previous_block.first(), Some(Token::Punctuation(_))) {
1260 current_block.extend_from_slice(line);
1262 } else {
1263 current_block.extend_from_slice(line);
1265 }
1266 } else {
1267 current_block.extend_from_slice(line);
1269 }
1270}
1271
1272fn group_setext_heading_one(blocks: &mut Vec<Vec<Token>>, previous_block: &mut Vec<Token>) {
1279 previous_block.insert(0, Token::Punctuation(String::from("#")));
1280 previous_block.insert(1, Token::Whitespace);
1281
1282 blocks.pop();
1284 blocks.push(take(previous_block));
1285}
1286
1287fn group_ordered_list(
1297 blocks: &mut Vec<Vec<Token>>,
1298 current_block: &mut Vec<Token>,
1299 previous_block: &mut Vec<Token>,
1300 line: &[Token],
1301) {
1302 if let Some(previous_line_start) = previous_block.first() {
1303 match previous_line_start {
1304 Token::OrderedListMarker(_) if previous_block.get(1) == Some(&Token::Whitespace) => {
1305 attach_to_previous_block(blocks, previous_block, line, Some(Token::Newline));
1307 }
1308 _ => {
1309 current_block.extend_from_slice(line);
1310 }
1311 }
1312 } else {
1313 current_block.extend_from_slice(line);
1314 }
1315}
1316
1317fn attach_to_previous_block(
1319 blocks: &mut Vec<Vec<Token>>,
1320 previous_block: &mut Vec<Token>,
1321 line: &[Token],
1322 separator: Option<Token>,
1323) {
1324 if let Some(separator) = separator {
1325 previous_block.push(separator);
1326 }
1327
1328 previous_block.extend_from_slice(line);
1329 blocks.pop();
1330 blocks.push(take(previous_block));
1331}
1332
1333fn group_tabbed_lines(
1345 blocks: &mut Vec<Vec<Token>>,
1346 current_block: &mut Vec<Token>,
1347 previous_block: &mut Vec<Token>,
1348 line: &[Token],
1349) {
1350 if line.len() == 1 {
1351 current_block.extend_from_slice(line);
1352 return;
1353 }
1354
1355 let non_whitespace_index = line
1356 .iter()
1357 .position(|token| !matches!(token, Token::Whitespace | Token::Tab | Token::Newline));
1358
1359 if let Some(first_content_token) = line.get(non_whitespace_index.unwrap_or(0)) {
1360 if matches!(first_content_token, Token::RawHtmlTag(_))
1361 && matches!(previous_block.first(), Some(Token::RawHtmlTag(_)))
1362 {
1363 let line_to_attach = line
1365 .iter()
1366 .skip_while(|t| matches!(t, Token::Whitespace | Token::Tab | Token::Newline))
1367 .cloned()
1368 .collect::<Vec<Token>>();
1369
1370 attach_to_previous_block(
1371 blocks,
1372 previous_block,
1373 &line_to_attach,
1374 Some(Token::Newline),
1375 );
1376
1377 return;
1378 } else if matches!(first_content_token, Token::RawHtmlTag(_)) {
1379 current_block.extend(
1380 line.iter()
1381 .skip_while(|t| matches!(t, Token::Whitespace | Token::Tab | Token::Newline))
1382 .cloned(),
1383 );
1384 return;
1385 }
1386
1387 if !previous_block.is_empty() {
1388 let previous_line_start = previous_block.first();
1389 match previous_line_start {
1390 Some(Token::Punctuation(string))
1391 if (string == "-" || string == "*")
1392 && previous_block.get(1) == Some(&Token::Whitespace) =>
1393 {
1394 attach_to_previous_block(blocks, previous_block, line, Some(Token::Newline));
1396 }
1397 Some(Token::OrderedListMarker(_))
1398 if previous_block.get(1) == Some(&Token::Whitespace) =>
1399 {
1400 attach_to_previous_block(blocks, previous_block, line, Some(Token::Newline));
1403 }
1404 Some(Token::RawHtmlTag(_)) => {
1405 attach_to_previous_block(blocks, previous_block, line, Some(Token::Newline));
1406 }
1407 Some(Token::Tab) => {
1408 attach_to_previous_block(blocks, previous_block, line, Some(Token::Newline));
1409 }
1410 _ => {
1411 current_block.extend_from_slice(line);
1414 }
1415 }
1416 } else {
1417 current_block.extend_from_slice(line);
1420 }
1421 }
1422}
1423
1424fn group_lines_with_leading_whitespace(
1433 blocks: &mut Vec<Vec<Token>>,
1434 current_block: &mut Vec<Token>,
1435 previous_block: &mut Vec<Token>,
1436 line: &[Token],
1437) {
1438 if let Some(first_content_token) = line
1439 .iter()
1440 .find(|t| !matches!(t, Token::Whitespace | Token::Tab | Token::Newline))
1441 {
1442 if let Some(previous_line_start) = previous_block.first() {
1443 match previous_line_start {
1444 Token::Whitespace => {
1445 if line
1447 .iter()
1448 .any(|t| !matches!(t, Token::Whitespace | Token::Tab | Token::Newline))
1449 {
1450 attach_to_previous_block(
1451 blocks,
1452 previous_block,
1453 line,
1454 Some(Token::Newline),
1455 );
1456 } else {
1457 current_block.extend_from_slice(line);
1458 }
1459 }
1460 Token::RawHtmlTag(_) => {
1461 if matches!(first_content_token, Token::RawHtmlTag(_)) {
1462 attach_to_previous_block(
1464 blocks,
1465 previous_block,
1466 line,
1467 Some(Token::Newline),
1468 );
1469 } else {
1470 current_block.extend_from_slice(line);
1471 }
1472 }
1473 Token::Punctuation(string) if string == "-" => {
1474 if matches!(first_content_token, Token::Punctuation(_)) {
1475 attach_to_previous_block(
1476 blocks,
1477 previous_block,
1478 line,
1479 Some(Token::Newline),
1480 );
1481 } else {
1482 current_block.extend_from_slice(line);
1483 }
1484 }
1485 Token::Text(_) | Token::Punctuation(_) => {
1486 attach_to_previous_block(blocks, previous_block, line, Some(Token::Newline));
1487 }
1488 _ => {
1489 current_block.extend(
1491 line.iter()
1492 .skip_while(|t| {
1493 matches!(t, Token::Whitespace | Token::Tab | Token::Newline)
1494 })
1495 .cloned(),
1496 );
1497 }
1498 }
1499 } else {
1500 current_block.extend_from_slice(line);
1501 }
1502 }
1503}
1504
1505fn group_dashed_lines(
1514 blocks: &mut Vec<Vec<Token>>,
1515 current_block: &mut Vec<Token>,
1516 previous_block: &mut Vec<Token>,
1517 line: &[Token],
1518) {
1519 if let Some(previous_line_start) = previous_block.first() {
1520 match previous_line_start {
1521 Token::Punctuation(string)
1522 if string == "-" && previous_block.get(1) == Some(&Token::Whitespace) =>
1523 {
1524 attach_to_previous_block(blocks, previous_block, line, Some(Token::Newline));
1527 }
1528 Token::Punctuation(string) if string == "#" => {
1529 blocks.push(line.to_owned());
1530 }
1531 _ => {
1532 if line.len() > 1 {
1533 current_block.extend_from_slice(line);
1534 } else {
1535 previous_block.insert(0, Token::Punctuation(String::from("#")));
1537 previous_block.insert(1, Token::Punctuation(String::from("#")));
1538 previous_block.insert(2, Token::Whitespace);
1539 blocks.pop();
1540 blocks.push(take(previous_block));
1541 }
1542 }
1543 }
1544 } else {
1545 current_block.extend_from_slice(line);
1546 }
1547}
1548
1549fn group_asterisked_lines(
1558 blocks: &mut Vec<Vec<Token>>,
1559 current_block: &mut Vec<Token>,
1560 previous_block: &mut Vec<Token>,
1561 line: &[Token],
1562) {
1563 if let Some(previous_line_start) = previous_block.first() {
1564 if *previous_line_start == Token::Punctuation(String::from("*"))
1565 && previous_block.get(1) == Some(&Token::Whitespace)
1566 {
1567 attach_to_previous_block(blocks, previous_block, line, Some(Token::Newline));
1568 } else {
1569 current_block.extend_from_slice(line);
1570 }
1571 } else {
1572 current_block.extend_from_slice(line);
1573 }
1574}
1575
1576#[cfg(test)]
1577mod test;