markrs/
parser.rs

1//! This module contains the parser for converting tokenized Markdown lines into structured
2//! Markdown elements.
3//!
4//! It provides functions to parse block-level elements like headings, lists, and code blocks,
5//! as well as inline elements like links, images, and emphasis.
6
7use std::mem::take;
8
9use log::warn;
10
11use crate::CONFIG;
12use crate::types::{
13    Delimiter, MdBlockElement, MdInlineElement, MdListItem, MdTableCell, TableAlignment, Token,
14    TokenCursor,
15};
16use crate::utils::push_buffer_to_collection;
17
18/// Parses a vector of tokenized markdown lines into a vector of block-level Markdown elements.
19///
20/// # Arguments
21/// * `markdown_lines` - A vector of vectors, where each inner vector contains tokens representing a line of markdown.
22///
23/// # Returns
24/// A vector of parsed block-level Markdown elements.
25pub fn parse_blocks(markdown_lines: &[Vec<Token>]) -> Vec<MdBlockElement> {
26    let mut block_elements: Vec<MdBlockElement> = Vec::new();
27
28    for line in markdown_lines {
29        if let Some(element) = parse_block(line) {
30            block_elements.push(element)
31        }
32    }
33
34    block_elements
35}
36
37/// Parses a single line of tokens into a block-level Markdown element.
38///
39/// # Arguments
40/// * `line` - A vector of tokens representing a single line of markdown.
41///
42/// # Returns
43/// An `Option<MdBlockElement>`, returning `None` for empty lines
44fn parse_block(line: &[Token]) -> Option<MdBlockElement> {
45    let first_token = line.first();
46
47    match first_token {
48        Some(Token::Punctuation(string)) if string == "#" => Some(parse_heading(line)),
49        Some(Token::Punctuation(string)) if string == "-" || string == "*" => {
50            // Note that setext headings have already been handled in the group_lines_to_blocks
51            // function by this point
52            if line.len() == 1 && string == "-" {
53                // If the line only contains a dash, then it is a thematic break
54                Some(MdBlockElement::ThematicBreak)
55            } else {
56                Some(parse_unordered_list(line))
57            }
58        }
59        Some(Token::OrderedListMarker(_)) => Some(parse_ordered_list(line)),
60        Some(Token::CodeFence) => Some(parse_codeblock(line)),
61        Some(Token::ThematicBreak) => Some(MdBlockElement::ThematicBreak),
62        Some(Token::TableCellSeparator) => Some(parse_table(line)),
63        Some(Token::BlockQuoteMarker) => Some(parse_blockquote(line)),
64        Some(Token::RawHtmlTag(_)) => Some(parse_raw_html(line)),
65        Some(Token::Tab) => Some(parse_indented_codeblock(line)),
66        Some(Token::Newline) => None,
67        _ => Some(MdBlockElement::Paragraph {
68            content: parse_inline(line),
69        }),
70    }
71}
72
73/// Parses an indented code block from a vector of tokens.
74///
75/// Note that CommonMark defines indented code blocks as lines that start with at least 4 spaces or
76/// a tab. However, this implementation only focuses on tabs, the size of which is defined in
77/// `config.toml`.
78///
79/// # Arguments
80/// * `line` - A vector of tokens representing an indented code block.
81///
82/// # Returns
83/// An `MdBlockElement::CodeBlock` containing the parsed code content.
84fn parse_indented_codeblock(line: &[Token]) -> MdBlockElement {
85    let mut code_content: Vec<String> = Vec::new();
86    let mut line_buffer: String = String::new();
87
88    let lines_split_by_newline = line.split(|token| token == &Token::Newline);
89
90    lines_split_by_newline.for_each(|token_line| {
91        if token_line.is_empty() {
92            return;
93        }
94
95        for token in &token_line[1..] {
96            match token {
97                Token::Tab => {
98                    line_buffer.push_str(&" ".repeat(CONFIG.get().unwrap().lexer.tab_size));
99                }
100                Token::Text(string) | Token::Punctuation(string) => line_buffer.push_str(string),
101                Token::Whitespace => line_buffer.push(' '),
102                Token::Newline => {
103                    push_buffer_to_collection(&mut code_content, &mut line_buffer);
104                }
105                Token::Escape(esc_char) => {
106                    line_buffer.push_str(&format!("\\{esc_char}"));
107                }
108                Token::OrderedListMarker(string) => line_buffer.push_str(string),
109                Token::EmphasisRun { delimiter, length } => {
110                    line_buffer.push_str(&delimiter.to_string().repeat(*length))
111                }
112                Token::OpenParenthesis => line_buffer.push('('),
113                Token::CloseParenthesis => line_buffer.push(')'),
114                Token::OpenBracket => line_buffer.push('['),
115                Token::CloseBracket => line_buffer.push(']'),
116                Token::TableCellSeparator => line_buffer.push('|'),
117                Token::CodeTick => line_buffer.push('`'),
118                Token::CodeFence => line_buffer.push_str("```"),
119                Token::BlockQuoteMarker => line_buffer.push('>'),
120                Token::ThematicBreak => line_buffer.push_str("---"),
121                Token::RawHtmlTag(tag_content) => {
122                    // This should never be the first token, but inline html is allowed
123                    let escaped_tag = tag_content.replace("<", "&lt;").replace(">", "&gt;");
124                    line_buffer.push_str(&escaped_tag);
125                }
126            }
127        }
128
129        push_buffer_to_collection(&mut code_content, &mut line_buffer);
130    });
131
132    MdBlockElement::CodeBlock {
133        language: None,
134        lines: code_content,
135    }
136}
137
138/// Parses raw HTML tags from a vector of tokens into an `MdBlockElement::RawHtml`.
139///
140/// # Arguments
141/// * `line` - A vector of tokens representing a line of raw HTML.
142///
143/// # Returns
144/// An `MdBlockElement::RawHtml` containing the parsed HTML content.
145fn parse_raw_html(line: &[Token]) -> MdBlockElement {
146    let mut html_content = String::new();
147    for token in line {
148        match token {
149            Token::RawHtmlTag(tag_content) => html_content.push_str(tag_content),
150            Token::Text(string) | Token::Punctuation(string) => html_content.push_str(string),
151            Token::Whitespace => html_content.push(' '),
152            Token::Escape(esc_char) => {
153                html_content.push_str(&format!("\\{esc_char}"));
154            }
155            Token::Newline => html_content.push('\n'),
156            Token::OrderedListMarker(string) => html_content.push_str(string),
157            Token::EmphasisRun { delimiter, length } => {
158                html_content.push_str(&delimiter.to_string().repeat(*length))
159            }
160            Token::OpenParenthesis => html_content.push('('),
161            Token::CloseParenthesis => html_content.push(')'),
162            Token::OpenBracket => html_content.push('['),
163            Token::CloseBracket => html_content.push(']'),
164            Token::TableCellSeparator => html_content.push('|'),
165            Token::CodeTick => html_content.push('`'),
166            Token::CodeFence => html_content.push_str("```"),
167            Token::BlockQuoteMarker => html_content.push('>'),
168            Token::Tab => {
169                html_content.push_str(&" ".repeat(CONFIG.get().unwrap().lexer.tab_size));
170            }
171            Token::ThematicBreak => html_content.push_str("---"),
172        }
173    }
174
175    MdBlockElement::RawHtml {
176        content: html_content,
177    }
178}
179
180/// Parses a blockquote from a vector of tokens into an `MdBlockElement::BlockQuote`.
181///
182/// # Arguments
183/// * `line` - A vector of tokens representing a blockquote.
184///
185/// # Returns
186/// An `MdBlockElement::BlockQuote` containing the parsed content, or a `MdBlockElement::Paragraph`
187/// if the content is empty.
188fn parse_blockquote(line: &[Token]) -> MdBlockElement {
189    let lines_split_by_newline = line.split(|token| token == &Token::Newline);
190
191    let inner_blocks: Vec<Vec<Token>> = lines_split_by_newline
192        .map(|tokens| {
193            let mut result = Vec::new();
194            if tokens.first() == Some(&Token::BlockQuoteMarker)
195                && tokens.get(1) == Some(&Token::Whitespace)
196            {
197                result.extend_from_slice(&tokens[2..]);
198            } else if tokens.first() == Some(&Token::BlockQuoteMarker) {
199                result.extend_from_slice(&tokens[1..]);
200            } else {
201                result.extend_from_slice(tokens);
202            }
203            result
204        })
205        .collect();
206
207    let grouped_inner_blocks = group_lines_to_blocks(inner_blocks);
208
209    let content = parse_blocks(&grouped_inner_blocks);
210
211    if content.is_empty() {
212        MdBlockElement::Paragraph {
213            content: parse_inline(line),
214        }
215    } else {
216        MdBlockElement::BlockQuote { content }
217    }
218}
219
220/// Parses a vector of tokens representing an ordered list into an `MdBlockElement::OrderedList`.
221///
222/// Calls the more generic `parse_list` function, which parses nested list items
223///
224/// # Arguments
225/// * `list` - A vector of tokens representing an ordered list.
226///
227/// # Returns
228/// An `MdBlockElement` representing the ordered list.
229fn parse_ordered_list(list: &[Token]) -> MdBlockElement {
230    let starting_num = if let Some(Token::OrderedListMarker(num)) = list.first() {
231        num.parse::<usize>().unwrap_or(1)
232    } else {
233        1
234    };
235    parse_list(
236        list,
237        |tokens| {
238            matches!(
239                tokens.first(),
240                Some(Token::OrderedListMarker(_)) if tokens.get(1) == Some(&Token::Whitespace)
241            )
242        },
243        |items| MdBlockElement::OrderedList {
244            items,
245            starting_num,
246        },
247    )
248}
249
250/// Parses a vector of tokens representing an unordered list into an `MdBlockElement::UnorderedList`.
251///
252/// Calls the more generic `parse_list` function, which parses nested list items
253///
254/// # Arguments
255/// * `list` - A vector of tokens representing an unordered list.
256///
257/// # Returns
258/// An `MdBlockElement` representing the unordered list.
259fn parse_unordered_list(list: &[Token]) -> MdBlockElement {
260    parse_list(
261        list,
262        |tokens| {
263            matches!(tokens.first(), Some(Token::Punctuation(string)) if (string == "-" || string == "*") && tokens.get(1) == Some(&Token::Whitespace)
264            )
265        },
266        |items| MdBlockElement::UnorderedList { items },
267    )
268}
269
270/// Generic list parser used to reduce code duplication between ordered and unordered lists.
271///
272/// Handles splitting lines, identifying list items, and parsing nested lists. The behavior is
273/// determined by a predicate for identifying list items and a constructor for the resulting block.
274///
275/// # Arguments
276/// * `list` - The tokens to parse.
277/// * `is_list_item` - Predicate to identify a top-level list item.
278/// * `make_block` - Constructor for the resulting `MdBlockElement`.
279///
280/// # Returns
281/// An `MdBlockElement` representing either an ordered or unordered list, depending on the passed in constructor.
282fn parse_list<F, G>(list: &[Token], is_list_item: F, make_block: G) -> MdBlockElement
283where
284    F: Fn(&[Token]) -> bool,
285    G: Fn(Vec<MdListItem>) -> MdBlockElement,
286{
287    let lists_split_by_newline = list
288        .split(|token| token == &Token::Newline)
289        .collect::<Vec<_>>();
290    let mut list_items: Vec<MdListItem> = Vec::new();
291
292    let mut i = 0;
293    while i < lists_split_by_newline.len() {
294        let line = lists_split_by_newline[i];
295        if is_list_item(line) {
296            let content_tokens = &line[2..];
297            if let Some(content) = parse_block(content_tokens) {
298                list_items.push(MdListItem { content })
299            }
300
301            // Check for consecutive tab-indented lines (nested list)
302            let mut nested_lines: Vec<Vec<Token>> = Vec::new();
303            let mut j = i + 1;
304            while j < lists_split_by_newline.len() {
305                let nested_line = lists_split_by_newline[j];
306                if nested_line.first() == Some(&Token::Tab) {
307                    let mut nested = nested_line.to_vec();
308                    while !nested.is_empty() && nested[0] == Token::Tab {
309                        nested.remove(0);
310                    }
311                    nested_lines.push(nested);
312                    j += 1;
313                } else {
314                    break;
315                }
316            }
317
318            if !nested_lines.is_empty() {
319                // Flatten nested lines into a single Vec<Token> separated by Newline
320                let mut nested_tokens: Vec<Token> = Vec::new();
321                for (k, l) in nested_lines.into_iter().enumerate() {
322                    if k > 0 {
323                        nested_tokens.push(Token::Newline);
324                    }
325                    nested_tokens.extend(l);
326                }
327
328                // Recursively parse nested list, try ordered first, fallback to unordered
329                let nested_block = if let Some(Token::OrderedListMarker(_)) = nested_tokens.first()
330                {
331                    parse_ordered_list(&nested_tokens)
332                } else {
333                    parse_unordered_list(&nested_tokens)
334                };
335
336                list_items.push(MdListItem {
337                    content: nested_block,
338                });
339
340                i = j - 1; // Skip processed nested lines
341            }
342        }
343        i += 1;
344    }
345
346    // Use the passed in constructor to create the List element
347    make_block(list_items)
348}
349
350/// Parses a vector of tokens representing a code block into an `MdBlockElement::CodeBlock`.
351///
352/// Extracts the language (if specified) and the code content.
353///
354/// # Arguments
355/// * `line` - A vector of tokens representing a code block.
356///
357/// # Returns
358/// An `MdBlockElement` representing the code block.
359fn parse_codeblock(line: &[Token]) -> MdBlockElement {
360    let mut code_content: Vec<String> = Vec::new();
361    let mut language = None;
362    let mut line_buffer: String = String::new();
363    let mut lines_split_by_newline = line
364        .split(|token| token == &Token::Newline)
365        .collect::<Vec<_>>();
366
367    if let Some(Token::Text(string)) = line.get(1) {
368        language = Some(string.clone());
369        lines_split_by_newline.remove(0);
370    }
371
372    lines_split_by_newline.iter().for_each(|line| {
373        if line.is_empty() {
374            return;
375        }
376
377        for token in line.iter() {
378            match token {
379                Token::Text(string) | Token::Punctuation(string) => line_buffer.push_str(string),
380                Token::Whitespace => line_buffer.push(' '),
381                Token::Newline => {
382                    push_buffer_to_collection(&mut code_content, &mut line_buffer);
383                }
384                Token::Tab => {
385                    line_buffer.push_str(&" ".repeat(CONFIG.get().unwrap().lexer.tab_size));
386                }
387                Token::Escape(esc_char) => {
388                    line_buffer.push_str(&format!("\\{esc_char}"));
389                }
390                Token::OrderedListMarker(string) => line_buffer.push_str(string),
391                Token::EmphasisRun { delimiter, length } => {
392                    line_buffer.push_str(&delimiter.to_string().repeat(*length))
393                }
394                Token::OpenParenthesis => line_buffer.push('('),
395                Token::CloseParenthesis => line_buffer.push(')'),
396                Token::OpenBracket => line_buffer.push('['),
397                Token::CloseBracket => line_buffer.push(']'),
398                Token::TableCellSeparator => line_buffer.push('|'),
399                Token::CodeTick => line_buffer.push('`'),
400                Token::CodeFence => {}
401                Token::BlockQuoteMarker => line_buffer.push('>'),
402                Token::RawHtmlTag(tag_content) => {
403                    let escaped_tag = tag_content.replace("<", "&lt;").replace(">", "&gt;");
404                    line_buffer.push_str(&escaped_tag);
405                }
406                Token::ThematicBreak => line_buffer.push_str("---"),
407            }
408        }
409
410        push_buffer_to_collection(&mut code_content, &mut line_buffer);
411    });
412
413    push_buffer_to_collection(&mut code_content, &mut line_buffer);
414
415    MdBlockElement::CodeBlock {
416        language,
417        lines: code_content,
418    }
419}
420
421/// Parses a vector of tokens representing a heading into an `MdBlockElement::Header`.
422///
423/// Determines the heading level and parses the heading content.
424///
425/// # Arguments
426/// * `line` - A vector of tokens representing a heading line.
427///
428/// # Returns
429/// An `MdBlockElement` representing the heading, or a paragraph if the heading is invalid.
430fn parse_heading(line: &[Token]) -> MdBlockElement {
431    let mut heading_level = 0;
432    let mut i = 0;
433    while let Some(token) = line.get(i) {
434        match token {
435            Token::Punctuation(string) => {
436                if string == "#" {
437                    heading_level += 1;
438                } else {
439                    break;
440                }
441            }
442            _ => break,
443        }
444        i += 1;
445    }
446
447    // At this point, we should be at a non-# token or the end of the line
448    if i >= line.len() || line.get(i) != Some(&Token::Whitespace) {
449        return MdBlockElement::Paragraph {
450            content: parse_inline(line),
451        };
452    }
453
454    MdBlockElement::Header {
455        level: heading_level,
456        content: parse_inline(&line[i + 1..]),
457    }
458}
459
460/// Parses GitHub-style tables from the input vector of tokens.
461pub fn parse_table(line: &[Token]) -> MdBlockElement {
462    let rows = line
463        .split(|token| token == &Token::Newline)
464        .collect::<Vec<_>>();
465
466    if rows.len() < 3 {
467        return MdBlockElement::Paragraph {
468            content: parse_inline(line),
469        };
470    }
471
472    let header_row = rows
473        .first()
474        .expect("Table should have at least a header row");
475
476    let alignment_row = rows.get(1).expect("Table should have an alignment row");
477
478    let alignments: Vec<TableAlignment> = split_row(alignment_row)
479        .into_iter()
480        .map(|cell_content| {
481            let content: String = cell_content
482                .iter()
483                .filter_map(|token| match token {
484                    Token::Text(s) => {
485                        warn!("Table alignment should not contain text as it could result in unexpected behavior: {s}");
486                        Some(s.to_owned())
487                    }
488                    Token::Punctuation(s) => Some(s.to_owned()),
489                    Token::ThematicBreak => Some("---".to_string()),
490                    _ => None,
491                })
492                .collect();
493
494            match (content.starts_with(':'), content.ends_with(':')) {
495                (true, true) => TableAlignment::Center,
496                (true, false) => TableAlignment::Left,
497                (false, true) => TableAlignment::Right,
498                _ => TableAlignment::None,
499            }
500        })
501        .collect();
502
503    let headers: Vec<MdTableCell> = split_row(header_row)
504        .into_iter()
505        .enumerate()
506        .map(|(i, cell_content)| MdTableCell {
507            content: parse_inline(cell_content),
508            alignment: alignments.get(i).cloned().unwrap_or(TableAlignment::None),
509            is_header: true,
510        })
511        .collect();
512
513    let body: Vec<Vec<MdTableCell>> = rows
514        .iter()
515        .skip(2)
516        .map(|row| {
517            split_row(row)
518                .into_iter()
519                .enumerate()
520                .map(|(i, cell_tokens)| MdTableCell {
521                    content: parse_inline(cell_tokens),
522                    alignment: alignments.get(i).cloned().unwrap_or(TableAlignment::None),
523                    is_header: false,
524                })
525                .collect()
526        })
527        .collect();
528
529    MdBlockElement::Table { headers, body }
530}
531
532/// Helper function to split a row of tokens into individual cells.
533///
534/// By removing the starting and ending "|" characters, it ensures that the row is
535/// split into the proper number of cells.
536fn split_row(row: &[Token]) -> Vec<&[Token]> {
537    let mut cells: Vec<&[Token]> = row
538        .split(|token| token == &Token::TableCellSeparator)
539        .collect();
540
541    if let Some(first) = cells.first() {
542        if first.is_empty() {
543            cells.remove(0);
544        }
545    }
546    if let Some(last) = cells.last() {
547        if last.is_empty() {
548            cells.pop();
549        }
550    }
551
552    cells
553}
554
555/// Parses a vector of tokens into a vector of inline Markdown elements (i.e. links, images,
556/// bold/italics, etc.).
557///
558/// # Arguments
559/// * `markdown_tokens` - A vector of tokens representing inline markdown content.
560///
561/// # Returns
562/// A vector of parsed inline Markdown elements.
563pub fn parse_inline(markdown_tokens: &[Token]) -> Vec<MdInlineElement> {
564    let mut parsed_inline_elements: Vec<MdInlineElement> = Vec::new();
565
566    let mut cursor: TokenCursor = TokenCursor {
567        tokens: markdown_tokens.to_vec(),
568        current_position: 0,
569    };
570
571    let mut delimiter_stack: Vec<Delimiter> = Vec::new();
572
573    let mut buffer: String = String::new();
574
575    let mut current_token: &Token;
576    while !cursor.is_at_eof() {
577        current_token = cursor.current().expect("Token should be valid markdown");
578
579        match current_token {
580            Token::EmphasisRun { delimiter, length } => {
581                push_buffer_to_collection(&mut parsed_inline_elements, &mut buffer);
582
583                delimiter_stack.push(Delimiter {
584                    run_length: *length,
585                    ch: *delimiter,
586                    token_position: cursor.position(),
587                    parsed_position: parsed_inline_elements.len(),
588                    active: true,
589                    can_open: true,
590                    can_close: true,
591                });
592
593                parsed_inline_elements.push(MdInlineElement::Placeholder {
594                    ch: *delimiter,
595                    token_position: cursor.position(),
596                });
597            }
598            Token::OpenBracket => {
599                push_buffer_to_collection(&mut parsed_inline_elements, &mut buffer);
600
601                let link_element =
602                    parse_link_type(&mut cursor, |label, title, url| MdInlineElement::Link {
603                        text: label,
604                        title,
605                        url,
606                    });
607                parsed_inline_elements.push(link_element);
608            }
609            Token::CodeTick => {
610                // Search for a matching code tick, everything else is text
611                cursor.advance();
612                push_buffer_to_collection(&mut parsed_inline_elements, &mut buffer);
613
614                let code_content = parse_code_span(&mut cursor);
615
616                if cursor.current() != Some(&Token::CodeTick) {
617                    parsed_inline_elements.push(MdInlineElement::Text {
618                        content: format!("`{code_content}`"),
619                    });
620                } else {
621                    parsed_inline_elements.push(MdInlineElement::Code {
622                        content: code_content,
623                    });
624                }
625            }
626            Token::Punctuation(string) if string == "!" => {
627                if cursor.peek_ahead(1) != Some(&Token::OpenBracket) {
628                    // If the next token is not an open bracket, treat it as text
629                    buffer.push('!');
630                    cursor.advance();
631                    continue;
632                }
633
634                push_buffer_to_collection(&mut parsed_inline_elements, &mut buffer);
635                cursor.advance(); // Advance to the open bracket
636
637                let image =
638                    parse_link_type(&mut cursor, |label, title, url| MdInlineElement::Image {
639                        alt_text: flatten_inline(&label),
640                        title,
641                        url,
642                    });
643
644                parsed_inline_elements.push(image);
645            }
646            Token::Escape(esc_char) => buffer.push_str(&format!("\\{esc_char}")),
647            Token::Text(string) | Token::Punctuation(string) => buffer.push_str(string),
648            Token::OrderedListMarker(string) => buffer.push_str(string),
649            Token::Whitespace => buffer.push(' '),
650            Token::CloseBracket => buffer.push(']'),
651            Token::OpenParenthesis => buffer.push('('),
652            Token::CloseParenthesis => buffer.push(')'),
653            Token::ThematicBreak => buffer.push_str("---"),
654            Token::TableCellSeparator => buffer.push('|'),
655            Token::BlockQuoteMarker => buffer.push('>'),
656            Token::RawHtmlTag(tag_content) => buffer.push_str(tag_content),
657            _ => push_buffer_to_collection(&mut parsed_inline_elements, &mut buffer),
658        }
659
660        cursor.advance();
661    }
662
663    push_buffer_to_collection(&mut parsed_inline_elements, &mut buffer);
664
665    delimiter_stack
666        .iter_mut()
667        .for_each(|el| el.classify_flanking(&cursor.tokens));
668
669    resolve_emphasis(&mut parsed_inline_elements, &mut delimiter_stack);
670
671    parsed_inline_elements
672}
673
674/// Parses a code span starting from the current position of the cursor.
675///
676/// # Arguments
677/// * `cursor` - A mutable reference to a `TokenCursor` that tracks the current position in the
678///
679/// # Returns
680/// A string containing the content of the code span, excluding the opening and closing code ticks.
681fn parse_code_span(cursor: &mut TokenCursor) -> String {
682    let mut code_content: String = String::new();
683    while let Some(next_token) = cursor.current() {
684        match next_token {
685            Token::CodeTick => break,
686            Token::Text(string) | Token::Punctuation(string) => code_content.push_str(string),
687            Token::OrderedListMarker(string) => code_content.push_str(string),
688            Token::Escape(ch) => code_content.push_str(&format!("\\{ch}")),
689            Token::OpenParenthesis => code_content.push('('),
690            Token::CloseParenthesis => code_content.push(')'),
691            Token::OpenBracket => code_content.push('['),
692            Token::CloseBracket => code_content.push(']'),
693            Token::TableCellSeparator => code_content.push('|'),
694            Token::EmphasisRun { delimiter, length } => {
695                code_content.push_str(&delimiter.to_string().repeat(*length))
696            }
697            Token::Whitespace => code_content.push(' '),
698            Token::Tab => code_content.push_str(&" ".repeat(CONFIG.get().unwrap().lexer.tab_size)),
699            Token::Newline => code_content.push('\n'),
700            Token::ThematicBreak => code_content.push_str("---"),
701            Token::BlockQuoteMarker => code_content.push('>'),
702            Token::RawHtmlTag(tag_content) => code_content.push_str(tag_content),
703            Token::CodeFence => {}
704        }
705
706        cursor.advance();
707    }
708
709    code_content
710}
711
712/// Helper function used in `parse_link_type` to circumvent Rust's limitation on closure recursion
713fn make_image(label: Vec<MdInlineElement>, title: Option<String>, uri: String) -> MdInlineElement {
714    MdInlineElement::Image {
715        alt_text: flatten_inline(&label),
716        title,
717        url: uri,
718    }
719}
720
721/// Helper function used in `parse_link_type` to circumvent Rust's limitation on closure recursion
722fn make_link(label: Vec<MdInlineElement>, title: Option<String>, uri: String) -> MdInlineElement {
723    MdInlineElement::Link {
724        text: label,
725        title,
726        url: uri,
727    }
728}
729
730/// Parses a link type (either a link or an image) from the current position of the cursor.
731///
732/// # Arguments
733/// * `cursor` - A mutable reference to a `TokenCursor` that tracks the current position in the
734///   token stream.
735/// * `make_element` - A closure that takes the parsed label elements, optional title, and URI,
736///   and returns an `MdInlineElement` representing the link or image.
737///
738/// # Returns
739/// An `MdInlineElement` representing the parsed link or image.
740fn parse_link_type<F>(cursor: &mut TokenCursor, make_element: F) -> MdInlineElement
741where
742    F: Fn(Vec<MdInlineElement>, Option<String>, String) -> MdInlineElement,
743{
744    let mut label_elements: Vec<MdInlineElement> = Vec::new();
745    let mut label_buffer = String::new();
746    let mut delimiter_stack: Vec<Delimiter> = Vec::new();
747    cursor.advance(); // Move past the open bracket
748    while let Some(token) = cursor.current() {
749        match token {
750            Token::CloseBracket => {
751                push_buffer_to_collection(&mut label_elements, &mut label_buffer);
752                break;
753            }
754            Token::OpenBracket => {
755                push_buffer_to_collection(&mut label_elements, &mut label_buffer);
756
757                let inner_link = parse_link_type(cursor, make_link);
758                label_elements.push(inner_link);
759            }
760            Token::EmphasisRun { delimiter, length } => {
761                push_buffer_to_collection(&mut label_elements, &mut label_buffer);
762                delimiter_stack.push(Delimiter {
763                    run_length: *length,
764                    ch: *delimiter,
765                    token_position: cursor.position(),
766                    parsed_position: label_elements.len(),
767                    active: true,
768                    can_open: true,
769                    can_close: true,
770                });
771                label_elements.push(MdInlineElement::Placeholder {
772                    ch: *delimiter,
773                    token_position: cursor.position(),
774                });
775            }
776            Token::Punctuation(s) if s == "!" => {
777                if cursor.peek_ahead(1) != Some(&Token::OpenBracket) {
778                    label_buffer.push('!');
779                    cursor.advance();
780                    continue;
781                }
782
783                push_buffer_to_collection(&mut label_elements, &mut label_buffer);
784                cursor.advance(); // Advance to the open bracket
785                let inner_image = parse_link_type(cursor, make_image);
786
787                label_elements.push(inner_image);
788            }
789            Token::Text(s) | Token::Punctuation(s) => label_buffer.push_str(s),
790            Token::OrderedListMarker(s) => label_buffer.push_str(s),
791            Token::Escape(ch) => label_buffer.push_str(&format!("\\{ch}")),
792            Token::Whitespace => label_buffer.push(' '),
793            Token::ThematicBreak => label_buffer.push_str("---"),
794            Token::OpenParenthesis => label_buffer.push('('),
795            Token::CloseParenthesis => label_buffer.push(')'),
796            Token::TableCellSeparator => label_buffer.push('|'),
797            Token::BlockQuoteMarker => label_buffer.push('>'),
798            _ => {}
799        }
800        cursor.advance();
801    }
802
803    push_buffer_to_collection(&mut label_elements, &mut label_buffer);
804    resolve_emphasis(&mut label_elements, &mut delimiter_stack);
805
806    // If we didn't find a closing bracket, treat it as text
807    if cursor.current() != Some(&Token::CloseBracket) {
808        return MdInlineElement::Text {
809            content: format!("[{}", flatten_inline(&label_elements)),
810        };
811    }
812
813    // At this point we should have parentheses for the uri, otherwise treat it as a
814    // text element
815    if cursor.peek_ahead(1) != Some(&Token::OpenParenthesis) {
816        cursor.advance();
817        return MdInlineElement::Text {
818            content: format!("[{}]", flatten_inline(&label_elements)),
819        };
820    }
821
822    cursor.advance(); // Move to '('
823
824    let mut uri = String::new();
825    let mut title = String::new();
826    let mut is_building_title = false;
827    let mut is_valid_title = true;
828    let mut has_opening_quote = false;
829
830    while let Some(token) = cursor.current() {
831        if !is_building_title {
832            match token {
833                Token::CloseParenthesis => break,
834                Token::Text(s) | Token::Punctuation(s) => uri.push_str(s),
835                Token::OrderedListMarker(s) => uri.push_str(s),
836                Token::Escape(ch) => uri.push_str(&format!("\\{ch}")),
837                Token::Whitespace => is_building_title = true,
838                Token::ThematicBreak => uri.push_str("---"),
839                Token::TableCellSeparator => uri.push('|'),
840                Token::BlockQuoteMarker => uri.push('>'),
841                Token::RawHtmlTag(tag_content) => uri.push_str(tag_content),
842                _ => {}
843            }
844        } else {
845            match token {
846                Token::CloseParenthesis => break,
847                Token::Punctuation(s) if s == "\"" => {
848                    if has_opening_quote {
849                        is_valid_title = true;
850                        is_building_title = false;
851                    } else {
852                        has_opening_quote = true;
853                        is_valid_title = false;
854                    }
855                }
856                Token::Text(s) | Token::Punctuation(s) => title.push_str(s),
857                Token::OrderedListMarker(s) => title.push_str(s),
858                Token::Escape(ch) => title.push_str(&format!("\\{ch}")),
859                Token::EmphasisRun { delimiter, length } => {
860                    title.push_str(&delimiter.to_string().repeat(*length))
861                }
862                Token::OpenBracket => title.push('['),
863                Token::CloseBracket => title.push(']'),
864                Token::OpenParenthesis => title.push('('),
865                Token::TableCellSeparator => title.push('|'),
866                Token::Tab => title.push('\t'),
867                Token::Newline => title.push_str("\\n"),
868                Token::Whitespace => title.push(' '),
869                Token::CodeTick => title.push('`'),
870                Token::CodeFence => title.push_str("```"),
871                Token::ThematicBreak => title.push_str("---"),
872                Token::BlockQuoteMarker => title.push('>'),
873                Token::RawHtmlTag(tag_content) => {
874                    warn!(
875                        "Raw HTML tags in titles can result in unexpected behavior: {tag_content}"
876                    );
877                    title.push_str(tag_content);
878                }
879            }
880        }
881        cursor.advance();
882    }
883
884    // If we didn't find a closing parenthesis or if the title is invalid, treat it as text
885    if cursor.current() != Some(&Token::CloseParenthesis) {
886        return MdInlineElement::Text {
887            content: format!("[{}]({} ", flatten_inline(&label_elements), uri),
888        };
889    } else if !title.is_empty() && !is_valid_title {
890        return MdInlineElement::Text {
891            content: format!("[{}]({} {})", flatten_inline(&label_elements), uri, title),
892        };
893    }
894
895    make_element(label_elements, Some(title).filter(|t| !t.is_empty()), uri)
896}
897
898/// Flattens a vector of inline Markdown elements into a single string.
899///
900/// # Arguments
901/// * `elements` - A vector of inline Markdown elements to flatten.
902///
903/// # Returns
904/// A string containing the concatenated content of all inline elements
905fn flatten_inline(elements: &[MdInlineElement]) -> String {
906    let mut result = String::new();
907    for element in elements {
908        match element {
909            MdInlineElement::Text { content } => result.push_str(content),
910            MdInlineElement::Bold { content } => result.push_str(&flatten_inline(content)),
911            MdInlineElement::Italic { content } => result.push_str(&flatten_inline(content)),
912            MdInlineElement::Code { content } => result.push_str(content),
913            MdInlineElement::Link { text, .. } => result.push_str(&flatten_inline(text)),
914            MdInlineElement::Image { alt_text, .. } => result.push_str(alt_text),
915            _ => {}
916        }
917    }
918    result
919}
920
921/// Wrapper function to start the call chain for `resolve_emphasis_recursive` if there is more than
922/// one delimiter in the stack.
923fn resolve_emphasis(elements: &mut Vec<MdInlineElement>, delimiter_stack: &mut [Delimiter]) {
924    if delimiter_stack.len() == 1 {
925        // If there is only one delimiter, it cannot be resolved to emphasis
926        if delimiter_stack[0].active {
927            elements[delimiter_stack[0].parsed_position] = MdInlineElement::Text {
928                content: delimiter_stack[0].ch.to_string(),
929            };
930        }
931        return;
932    }
933    resolve_emphasis_recursive(elements, delimiter_stack, 0);
934}
935
936/// Recursively parses (resolves) emphasis in a vector of inline Markdown elements.
937///
938/// Modifies the elements in place to convert delimiter runs into bold or italic elements as appropriate.
939///
940/// # Arguments
941/// * `elements` - A mutable reference to a vector of inline Markdown elements.
942/// * `delimiter_stack` - A mutable reference to a slice of delimiters.
943fn resolve_emphasis_recursive(
944    elements: &mut Vec<MdInlineElement>,
945    delimiter_stack: &mut [Delimiter],
946    index: usize,
947) {
948    if index >= delimiter_stack.len() {
949        delimiter_stack.iter_mut().for_each(|el| {
950            if el.active && el.parsed_position < elements.len() {
951                let element_to_insert = MdInlineElement::Text {
952                    content: el.ch.to_string().repeat(el.run_length),
953                };
954                if let Some(MdInlineElement::Placeholder { .. }) = elements.get(el.parsed_position)
955                {
956                    elements.remove(el.parsed_position);
957                }
958                elements.insert(el.parsed_position, element_to_insert);
959            }
960        });
961        return;
962    }
963
964    if !delimiter_stack[index].active || !delimiter_stack[index].can_close {
965        resolve_emphasis_recursive(elements, delimiter_stack, index + 1);
966        return;
967    }
968
969    let closer = delimiter_stack[index].clone();
970
971    for j in (0..index).rev() {
972        if !delimiter_stack[j].active || !delimiter_stack[j].can_open {
973            continue;
974        }
975
976        let opener = delimiter_stack[j].clone();
977
978        if !closer.ch.eq(&opener.ch) {
979            continue;
980        }
981
982        // Rule of 3: If the total length of the run is a multiple of 3 and both run lengths
983        // are not divisible by 3, they are not valid for emphasis
984        let length_total = closer.run_length + opener.run_length;
985        if ((closer.can_open && closer.can_close) || (opener.can_open && opener.can_close))
986            && (length_total % 3 == 0 && closer.run_length % 3 != 0 && opener.run_length % 3 != 0)
987        {
988            continue;
989        }
990
991        let delimiters_used = if closer.run_length >= 2 && opener.run_length >= 2 {
992            2
993        } else {
994            1
995        };
996
997        let range_start = if opener.run_length > delimiters_used {
998            (opener.parsed_position + 1).saturating_sub(delimiters_used)
999        } else {
1000            opener.parsed_position
1001        };
1002
1003        let range_end = if closer.run_length >= delimiters_used {
1004            closer.parsed_position
1005        } else {
1006            (closer.parsed_position + 1).saturating_sub(delimiters_used)
1007        };
1008
1009        let mut content_slice = elements[range_start + 1..range_end].to_vec();
1010
1011        // Remove any hanging placeholders that map to inactive delimiters
1012        for i in 0..content_slice.len() {
1013            if let Some(MdInlineElement::Placeholder { ch, token_position }) = content_slice.get(i)
1014            {
1015                if delimiter_stack
1016                    .iter()
1017                    .any(|d| !d.active && d.token_position == *token_position && d.ch == *ch)
1018                {
1019                    content_slice.remove(i);
1020                }
1021            }
1022        }
1023
1024        let element_to_insert = match delimiters_used {
1025            2 => MdInlineElement::Bold {
1026                content: content_slice,
1027            },
1028            1 => MdInlineElement::Italic {
1029                content: content_slice,
1030            },
1031            _ => unreachable!(),
1032        };
1033
1034        if closer.run_length > delimiters_used {
1035            elements[closer.parsed_position - 1] = element_to_insert;
1036        } else {
1037            elements.splice(range_start..=range_end, vec![element_to_insert]);
1038            let num_elements_removed = range_end - range_start;
1039            (0..delimiter_stack.len()).for_each(|k| {
1040                if delimiter_stack[k].parsed_position > closer.parsed_position {
1041                    delimiter_stack[k].parsed_position -= num_elements_removed;
1042                }
1043            });
1044        }
1045
1046        delimiter_stack[index].run_length = delimiter_stack[index]
1047            .run_length
1048            .saturating_sub(delimiters_used);
1049        delimiter_stack[j].run_length = delimiter_stack[j]
1050            .run_length
1051            .saturating_sub(delimiters_used);
1052
1053        if delimiter_stack[index].run_length == 0 {
1054            delimiter_stack[index].active = false;
1055        }
1056        if delimiter_stack[j].run_length == 0 {
1057            delimiter_stack[j].active = false;
1058        }
1059
1060        // After resolving, recursively process the stack again
1061        resolve_emphasis_recursive(elements, delimiter_stack, 0);
1062        return;
1063    }
1064
1065    // No opener found, move to next closer
1066    resolve_emphasis_recursive(elements, delimiter_stack, index + 1);
1067}
1068
1069/// Groups adjacent tokenized lines into groups (blocks) for further parsing.
1070///
1071/// # Arguments
1072/// * `tokenized_lines` - A vector of vectors, where each inner vector contains tokens representing a line of markdown.
1073///
1074/// # Returns
1075/// A vector of vectors, where each inner vector represents a grouped block of tokens.
1076pub fn group_lines_to_blocks(mut tokenized_lines: Vec<Vec<Token>>) -> Vec<Vec<Token>> {
1077    let mut blocks: Vec<Vec<Token>> = Vec::new();
1078    let mut current_block: Vec<Token> = Vec::new();
1079    let mut previous_block: Vec<Token>;
1080    let lines = tokenized_lines.iter_mut();
1081    let mut is_inside_code_block = false;
1082    for line in lines {
1083        previous_block = blocks.last().unwrap_or(&Vec::new()).to_vec();
1084
1085        // Appending all tokens between two code fences to one block
1086        if is_inside_code_block && line.first() != Some(&Token::CodeFence) {
1087            // If we are inside a code block, then we just append the line to the current block
1088            attach_to_previous_block(&mut blocks, &mut previous_block, line, Some(Token::Newline));
1089            continue;
1090        } else if is_inside_code_block && line.first() == Some(&Token::CodeFence) {
1091            // If we are inside a code block and the line starts with a code fence, then we end the
1092            // code block
1093            is_inside_code_block = false;
1094            attach_to_previous_block(&mut blocks, &mut previous_block, line, None);
1095            continue;
1096        }
1097
1098        match line.first() {
1099            Some(Token::Punctuation(string)) if string == "#" => {
1100                // For ATX headings, it must all be on one line
1101                blocks.push(line.to_owned());
1102            }
1103            Some(Token::Punctuation(string)) if string == "-" => {
1104                group_dashed_lines(&mut blocks, &mut current_block, &mut previous_block, line);
1105            }
1106            Some(Token::Punctuation(string)) if string == "*" => {
1107                group_asterisked_lines(&mut blocks, &mut current_block, &mut previous_block, line);
1108            }
1109            Some(Token::Tab) => {
1110                group_tabbed_lines(&mut blocks, &mut current_block, &mut previous_block, line);
1111            }
1112            Some(Token::OrderedListMarker(_)) => {
1113                group_ordered_list(&mut blocks, &mut current_block, &mut previous_block, line);
1114            }
1115            Some(Token::ThematicBreak) => {
1116                // Check if the previous line starts with anything other than a heading
1117                // If so, then this is actually a setext heading 2
1118                if let Some(previous_line_start) = previous_block.first() {
1119                    match previous_line_start {
1120                        Token::Punctuation(string) if string == "#" => {
1121                            blocks.push(take(line));
1122                        }
1123                        Token::Newline => blocks.push(take(line)),
1124                        _ => {
1125                            previous_block.insert(0, Token::Punctuation(String::from("#")));
1126                            previous_block.insert(1, Token::Punctuation(String::from("#")));
1127                            previous_block.insert(2, Token::Whitespace);
1128                            blocks.pop();
1129                            blocks.push(take(&mut previous_block));
1130                        }
1131                    }
1132                } else {
1133                    current_block.extend_from_slice(line);
1134                }
1135            }
1136            Some(Token::BlockQuoteMarker) => {
1137                if let Some(previous_line_start) = previous_block.first() {
1138                    if matches!(previous_line_start, Token::BlockQuoteMarker) {
1139                        attach_to_previous_block(
1140                            &mut blocks,
1141                            &mut previous_block,
1142                            line,
1143                            Some(Token::Newline),
1144                        );
1145                    } else {
1146                        current_block.extend_from_slice(line);
1147                    }
1148                } else {
1149                    current_block.extend_from_slice(line);
1150                }
1151            }
1152            Some(Token::CodeTick) => {
1153                current_block.extend_from_slice(line);
1154            }
1155            Some(Token::CodeFence) => {
1156                if !is_inside_code_block {
1157                    is_inside_code_block = true;
1158                    current_block.extend_from_slice(line);
1159                } else {
1160                    is_inside_code_block = false;
1161                    current_block.extend_from_slice(line);
1162                    blocks.push(take(&mut current_block));
1163                }
1164            }
1165            Some(Token::Text(string)) if string == "=" => {
1166                let has_trailing_content = line.iter().skip(1).any(|token| match token {
1167                    Token::Text(s) if s == "=" => false,
1168                    Token::Whitespace | Token::Tab | Token::Newline => false,
1169                    _ => true,
1170                });
1171
1172                // Setext heading 1
1173                if let Some(previous_line_start) = previous_block.first() {
1174                    if !has_trailing_content && matches!(previous_line_start, Token::Text(_)) {
1175                        group_setext_heading_one(&mut blocks, &mut previous_block);
1176                    } else {
1177                        group_text_lines(
1178                            &mut blocks,
1179                            &mut current_block,
1180                            &mut previous_block,
1181                            line,
1182                        );
1183                    }
1184                } else {
1185                    current_block.extend_from_slice(line);
1186                }
1187            }
1188            Some(Token::Text(_)) => {
1189                group_text_lines(&mut blocks, &mut current_block, &mut previous_block, line);
1190            }
1191            Some(Token::TableCellSeparator) => {
1192                group_table_rows(&mut blocks, &mut current_block, &mut previous_block, line);
1193            }
1194            Some(Token::Whitespace) => {
1195                group_lines_with_leading_whitespace(
1196                    &mut blocks,
1197                    &mut current_block,
1198                    &mut previous_block,
1199                    line,
1200                );
1201            }
1202            _ => {
1203                // Catch-all for everything else
1204                current_block.extend_from_slice(line);
1205            }
1206        }
1207
1208        if !current_block.is_empty() {
1209            blocks.push(take(&mut current_block));
1210        }
1211
1212        current_block.clear();
1213    }
1214    blocks
1215}
1216
1217/// Groups lines beginning with "|" denoting Markdown tables.
1218///
1219/// # Arguments
1220/// * `blocks` - A mutable reference to a vector of blocks, where each block is a vector of tokens.
1221/// * `current_block` - A mutable reference to the current block being processed.
1222/// * `previous_block` - A mutable reference to the previous block, used for context.
1223/// * `line` - A mutable reference to the current line being processed, which is a vector of
1224///   tokens.
1225fn group_table_rows(
1226    blocks: &mut Vec<Vec<Token>>,
1227    current_block: &mut Vec<Token>,
1228    previous_block: &mut Vec<Token>,
1229    line: &[Token],
1230) {
1231    if let Some(previous_line_start) = previous_block.first() {
1232        if previous_line_start == &Token::TableCellSeparator {
1233            attach_to_previous_block(blocks, previous_block, line, Some(Token::Newline));
1234        } else {
1235            current_block.extend_from_slice(line);
1236        }
1237    } else {
1238        current_block.extend_from_slice(line);
1239    }
1240}
1241
1242/// Groups text lines into blocks based on the previous block's content.
1243///
1244/// # Arguments
1245/// * `blocks` - A mutable reference to a vector of blocks, where each block is a vector of tokens.
1246/// * `current_block` - A mutable reference to the current block being processed.
1247/// * `previous_block` - A mutable reference to the previous block, used for context.
1248/// * `line` - A mutable reference to the current line being processed, which is a vector of
1249///   tokens.
1250fn group_text_lines(
1251    blocks: &mut Vec<Vec<Token>>,
1252    current_block: &mut Vec<Token>,
1253    previous_block: &mut Vec<Token>,
1254    line: &[Token],
1255) {
1256    if !previous_block.is_empty() {
1257        if matches!(previous_block.first(), Some(Token::Text(_))) {
1258            attach_to_previous_block(blocks, previous_block, line, Some(Token::Whitespace));
1259        } else if matches!(previous_block.first(), Some(Token::Punctuation(_))) {
1260            // If the previous block was a heading, then this is a new paragraph
1261            current_block.extend_from_slice(line);
1262        } else {
1263            // If the previous block was empty, then this is a new paragraph
1264            current_block.extend_from_slice(line);
1265        }
1266    } else {
1267        // If the previous block was empty, then this is a new paragraph
1268        current_block.extend_from_slice(line);
1269    }
1270}
1271
1272/// Groups Setext heading 1 lines into a block by prepending the previous block with "# ".
1273///
1274/// # Arguments
1275/// * `blocks` - A mutable reference to a vector of blocks, where each block is a vector of tokens.
1276/// * `previous_block` - A mutable reference to the previous block, which is modified to become a
1277///   Setext heading 1.
1278fn group_setext_heading_one(blocks: &mut Vec<Vec<Token>>, previous_block: &mut Vec<Token>) {
1279    previous_block.insert(0, Token::Punctuation(String::from("#")));
1280    previous_block.insert(1, Token::Whitespace);
1281
1282    // Swap previous block in
1283    blocks.pop();
1284    blocks.push(take(previous_block));
1285}
1286
1287/// Groups ordered list lines into a block by appending the line to the previous block if it is
1288/// part of the same list.
1289///
1290/// # Arguments
1291/// * `blocks` - A mutable reference to a vector of blocks, where each block is a vector of tokens.
1292/// * `current_block` - A mutable reference to the current block being processed.
1293/// * `previous_block` - A mutable reference to the previous block, used for context.
1294/// * `line` - A mutable reference to the current line being processed, which is a vector of
1295///   tokens.
1296fn group_ordered_list(
1297    blocks: &mut Vec<Vec<Token>>,
1298    current_block: &mut Vec<Token>,
1299    previous_block: &mut Vec<Token>,
1300    line: &[Token],
1301) {
1302    if let Some(previous_line_start) = previous_block.first() {
1303        match previous_line_start {
1304            Token::OrderedListMarker(_) if previous_block.get(1) == Some(&Token::Whitespace) => {
1305                // If the previous block is a list, then we append the line to it
1306                attach_to_previous_block(blocks, previous_block, line, Some(Token::Newline));
1307            }
1308            _ => {
1309                current_block.extend_from_slice(line);
1310            }
1311        }
1312    } else {
1313        current_block.extend_from_slice(line);
1314    }
1315}
1316
1317/// Attaches the current line to the previous block, optionally adding a separator token.
1318fn attach_to_previous_block(
1319    blocks: &mut Vec<Vec<Token>>,
1320    previous_block: &mut Vec<Token>,
1321    line: &[Token],
1322    separator: Option<Token>,
1323) {
1324    if let Some(separator) = separator {
1325        previous_block.push(separator);
1326    }
1327
1328    previous_block.extend_from_slice(line);
1329    blocks.pop();
1330    blocks.push(take(previous_block));
1331}
1332
1333/// Groups tabbed lines into blocks based on the previous block's content.
1334///
1335/// Note that this function short-circuits when the first token of the line is a raw HTML tag,
1336/// to allow for indented HTML.
1337///
1338/// # Arguments
1339/// * `blocks` - A mutable reference to a vector of blocks, where each block is a vector of tokens.
1340/// * `current_block` - A mutable reference to the current block being processed.
1341/// * `previous_block` - A mutable reference to the previous block, used for context.
1342/// * `line` - A mutable reference to the current line being processed, which is a vector of
1343///   tokens.
1344fn group_tabbed_lines(
1345    blocks: &mut Vec<Vec<Token>>,
1346    current_block: &mut Vec<Token>,
1347    previous_block: &mut Vec<Token>,
1348    line: &[Token],
1349) {
1350    if line.len() == 1 {
1351        current_block.extend_from_slice(line);
1352        return;
1353    }
1354
1355    let non_whitespace_index = line
1356        .iter()
1357        .position(|token| !matches!(token, Token::Whitespace | Token::Tab | Token::Newline));
1358
1359    if let Some(first_content_token) = line.get(non_whitespace_index.unwrap_or(0)) {
1360        if matches!(first_content_token, Token::RawHtmlTag(_))
1361            && matches!(previous_block.first(), Some(Token::RawHtmlTag(_)))
1362        {
1363            // If the first token is a raw HTML tag, we attach the line to the previous block
1364            let line_to_attach = line
1365                .iter()
1366                .skip_while(|t| matches!(t, Token::Whitespace | Token::Tab | Token::Newline))
1367                .cloned()
1368                .collect::<Vec<Token>>();
1369
1370            attach_to_previous_block(
1371                blocks,
1372                previous_block,
1373                &line_to_attach,
1374                Some(Token::Newline),
1375            );
1376
1377            return;
1378        } else if matches!(first_content_token, Token::RawHtmlTag(_)) {
1379            current_block.extend(
1380                line.iter()
1381                    .skip_while(|t| matches!(t, Token::Whitespace | Token::Tab | Token::Newline))
1382                    .cloned(),
1383            );
1384            return;
1385        }
1386
1387        if !previous_block.is_empty() {
1388            let previous_line_start = previous_block.first();
1389            match previous_line_start {
1390                Some(Token::Punctuation(string))
1391                    if (string == "-" || string == "*")
1392                        && previous_block.get(1) == Some(&Token::Whitespace) =>
1393                {
1394                    // If the previous block is a list, then we append the line to it
1395                    attach_to_previous_block(blocks, previous_block, line, Some(Token::Newline));
1396                }
1397                Some(Token::OrderedListMarker(_))
1398                    if previous_block.get(1) == Some(&Token::Whitespace) =>
1399                {
1400                    // If the previous block is an ordered list, then we append the
1401                    // line to it
1402                    attach_to_previous_block(blocks, previous_block, line, Some(Token::Newline));
1403                }
1404                Some(Token::RawHtmlTag(_)) => {
1405                    attach_to_previous_block(blocks, previous_block, line, Some(Token::Newline));
1406                }
1407                Some(Token::Tab) => {
1408                    attach_to_previous_block(blocks, previous_block, line, Some(Token::Newline));
1409                }
1410                _ => {
1411                    // If the previous block is not a list, then we just add the
1412                    // line to the current block
1413                    current_block.extend_from_slice(line);
1414                }
1415            }
1416        } else {
1417            // If the previous block is empty, then we just add the line to the
1418            // current block
1419            current_block.extend_from_slice(line);
1420        }
1421    }
1422}
1423
1424/// Groups lines with leading whitespace into blocks based on the previous block's content.
1425///
1426/// # Arguments
1427/// * `blocks` - A mutable reference to a vector of blocks, where each block is a vector of tokens.
1428/// * `current_block` - A mutable reference to the current block being processed.
1429/// * `previous_block` - A mutable reference to the previous block, used for context.
1430/// * `line` - A mutable reference to the current line being processed, which is a vector of
1431///   tokens.
1432fn group_lines_with_leading_whitespace(
1433    blocks: &mut Vec<Vec<Token>>,
1434    current_block: &mut Vec<Token>,
1435    previous_block: &mut Vec<Token>,
1436    line: &[Token],
1437) {
1438    if let Some(first_content_token) = line
1439        .iter()
1440        .find(|t| !matches!(t, Token::Whitespace | Token::Tab | Token::Newline))
1441    {
1442        if let Some(previous_line_start) = previous_block.first() {
1443            match previous_line_start {
1444                Token::Whitespace => {
1445                    // Check if the previous line has non-whitespace content
1446                    if line
1447                        .iter()
1448                        .any(|t| !matches!(t, Token::Whitespace | Token::Tab | Token::Newline))
1449                    {
1450                        attach_to_previous_block(
1451                            blocks,
1452                            previous_block,
1453                            line,
1454                            Some(Token::Newline),
1455                        );
1456                    } else {
1457                        current_block.extend_from_slice(line);
1458                    }
1459                }
1460                Token::RawHtmlTag(_) => {
1461                    if matches!(first_content_token, Token::RawHtmlTag(_)) {
1462                        // If the first token is a raw HTML tag, we attach the line to the previous block
1463                        attach_to_previous_block(
1464                            blocks,
1465                            previous_block,
1466                            line,
1467                            Some(Token::Newline),
1468                        );
1469                    } else {
1470                        current_block.extend_from_slice(line);
1471                    }
1472                }
1473                Token::Punctuation(string) if string == "-" => {
1474                    if matches!(first_content_token, Token::Punctuation(_)) {
1475                        attach_to_previous_block(
1476                            blocks,
1477                            previous_block,
1478                            line,
1479                            Some(Token::Newline),
1480                        );
1481                    } else {
1482                        current_block.extend_from_slice(line);
1483                    }
1484                }
1485                Token::Text(_) | Token::Punctuation(_) => {
1486                    attach_to_previous_block(blocks, previous_block, line, Some(Token::Newline));
1487                }
1488                _ => {
1489                    // Append the line to current block, excluding leading whitespace
1490                    current_block.extend(
1491                        line.iter()
1492                            .skip_while(|t| {
1493                                matches!(t, Token::Whitespace | Token::Tab | Token::Newline)
1494                            })
1495                            .cloned(),
1496                    );
1497                }
1498            }
1499        } else {
1500            current_block.extend_from_slice(line);
1501        }
1502    }
1503}
1504
1505/// Groups dashed lines into blocks based on the previous block's content.
1506///
1507/// # Arguments
1508/// * `blocks` - A mutable reference to a vector of blocks, where each block is a vector of tokens.
1509/// * `current_block` - A mutable reference to the current block being processed.
1510/// * `previous_block` - A mutable reference to the previous block, used for context.
1511/// * `line` - A mutable reference to the current line being processed, which is a vector of
1512///   tokens.
1513fn group_dashed_lines(
1514    blocks: &mut Vec<Vec<Token>>,
1515    current_block: &mut Vec<Token>,
1516    previous_block: &mut Vec<Token>,
1517    line: &[Token],
1518) {
1519    if let Some(previous_line_start) = previous_block.first() {
1520        match previous_line_start {
1521            Token::Punctuation(string)
1522                if string == "-" && previous_block.get(1) == Some(&Token::Whitespace) =>
1523            {
1524                // Then it is either the start of a list or part of a list
1525
1526                attach_to_previous_block(blocks, previous_block, line, Some(Token::Newline));
1527            }
1528            Token::Punctuation(string) if string == "#" => {
1529                blocks.push(line.to_owned());
1530            }
1531            _ => {
1532                if line.len() > 1 {
1533                    current_block.extend_from_slice(line);
1534                } else {
1535                    // Then this is a Setext heading 2
1536                    previous_block.insert(0, Token::Punctuation(String::from("#")));
1537                    previous_block.insert(1, Token::Punctuation(String::from("#")));
1538                    previous_block.insert(2, Token::Whitespace);
1539                    blocks.pop();
1540                    blocks.push(take(previous_block));
1541                }
1542            }
1543        }
1544    } else {
1545        current_block.extend_from_slice(line);
1546    }
1547}
1548
1549/// Groups lines that start with asterisks into blocks.
1550///
1551/// # Arguments
1552/// * `blocks` - A mutable reference to a vector of blocks, where each block is a vector of tokens.
1553/// * `current_block` - A mutable reference to the current block being processed.
1554/// * `previous_block` - A mutable reference to the previous block, used for context.
1555/// * `line` - A mutable reference to the current line being processed, which is a vector of
1556///   tokens.
1557fn group_asterisked_lines(
1558    blocks: &mut Vec<Vec<Token>>,
1559    current_block: &mut Vec<Token>,
1560    previous_block: &mut Vec<Token>,
1561    line: &[Token],
1562) {
1563    if let Some(previous_line_start) = previous_block.first() {
1564        if *previous_line_start == Token::Punctuation(String::from("*"))
1565            && previous_block.get(1) == Some(&Token::Whitespace)
1566        {
1567            attach_to_previous_block(blocks, previous_block, line, Some(Token::Newline));
1568        } else {
1569            current_block.extend_from_slice(line);
1570        }
1571    } else {
1572        current_block.extend_from_slice(line);
1573    }
1574}
1575
1576#[cfg(test)]
1577mod test;
markrs/parser.rs

markrs/
parser.rs