1use log::warn;
5
6use crate::html_generator::indent_html;
7use crate::{CONFIG, io::copy_image_to_output_dir, utils::build_rel_prefix};
8
9pub trait ToHtml {
10 fn to_html(&self, output_dir: &str, input_dir: &str, html_rel_path: &str) -> String;
12}
13
14#[derive(Debug, PartialEq, Clone)]
16pub enum Token {
17 Text(String),
18 EmphasisRun { delimiter: char, length: usize },
19 Punctuation(String),
20 OpenBracket,
21 CloseBracket,
22 OpenParenthesis,
23 CloseParenthesis,
24 TableCellSeparator,
25 OrderedListMarker(String),
26 Whitespace,
27 CodeTick,
28 CodeFence,
29 ThematicBreak,
30 Escape(String),
31 Tab,
32 Newline,
33 BlockQuoteMarker,
34 RawHtmlTag(String),
35}
36
37impl From<String> for Token {
38 fn from(s: String) -> Self {
39 Token::Text(s.to_string())
40 }
41}
42
43#[derive(Debug, PartialEq)]
45pub enum MdBlockElement {
46 Header {
47 level: u8,
48 content: Vec<MdInlineElement>,
49 },
50 Paragraph {
51 content: Vec<MdInlineElement>,
52 },
53 CodeBlock {
54 language: Option<String>,
55 lines: Vec<String>,
56 },
57 ThematicBreak,
58 UnorderedList {
59 items: Vec<MdListItem>,
60 },
61 OrderedList {
62 starting_num: usize,
63 items: Vec<MdListItem>,
64 },
65 Table {
66 headers: Vec<MdTableCell>,
67 body: Vec<Vec<MdTableCell>>,
68 },
69 BlockQuote {
70 content: Vec<MdBlockElement>,
71 },
72 RawHtml {
73 content: String,
74 },
75}
76
77impl ToHtml for MdBlockElement {
78 fn to_html(&self, output_dir: &str, input_dir: &str, html_rel_path: &str) -> String {
79 match self {
80 MdBlockElement::Header { level, content } => {
81 let inner_html = content
82 .iter()
83 .map(|el| el.to_html(output_dir, input_dir, html_rel_path))
84 .collect::<String>();
85
86 let id = content
87 .iter()
88 .map(MdInlineElement::to_plain_text)
89 .collect::<String>();
90
91 let id = clean_id(id);
92
93 format!("\n<h{level} id=\"{id}\">{inner_html}</h{level}>\n")
94 }
95 MdBlockElement::Paragraph { content } => {
96 let inner_html = content
97 .iter()
98 .map(|el| el.to_html(output_dir, input_dir, html_rel_path))
99 .collect::<String>();
100 format!("<p>{inner_html}</p>")
101 }
102 MdBlockElement::CodeBlock { language, lines } => {
103 let language_class = match language {
104 Some(language) => format!("language-{language}"),
105 None => "language-none".to_string(),
106 };
107
108 if CONFIG.get().unwrap().html.use_prism {
109 let code = lines.join("\n");
110
111 format!(
112 "<pre class=\"{language_class} line-numbers\" style=\"white-space: pre-wrap;\" data-prismjs-copy=\"📋\">\n<code class=\"{language_class} line-numbers\">{code}</code></pre>"
113 )
114 } else {
115 let code = lines
116 .iter()
117 .map(|line| format!("<code class=\"non_prism\">{line}</code>"))
118 .collect::<String>();
119
120 format!("<pre class=\"non_prism\">{code}</pre>")
121 }
122 }
123 MdBlockElement::ThematicBreak => "<hr>".to_string(),
124 MdBlockElement::UnorderedList { items } => {
125 let inner_items = items
126 .iter()
127 .map(|item| item.to_html(output_dir, input_dir, html_rel_path))
128 .collect::<String>();
129
130 let inner_items = indent_html(&inner_items, 1);
131 format!("<ul>\n{inner_items}\n</ul>")
132 }
133 MdBlockElement::OrderedList {
134 items,
135 starting_num,
136 } => {
137 let inner_items = items
138 .iter()
139 .map(|item| item.to_html(output_dir, input_dir, html_rel_path))
140 .collect::<String>();
141
142 let inner_items = indent_html(&inner_items, 1);
143 format!("<ol start=\"{starting_num}\">\n{inner_items}\n</ol>")
144 }
145 MdBlockElement::Table { headers, body } => {
146 let header_html = headers
147 .iter()
148 .map(|cell| cell.to_html(output_dir, input_dir, html_rel_path))
149 .collect::<Vec<_>>()
150 .join("\n");
151
152 let header_html = indent_html(&header_html, 3);
153
154 let body_html = body
155 .iter()
156 .map(|row| {
157 let cell_html = row
158 .iter()
159 .map(|cell| cell.to_html(output_dir, input_dir, html_rel_path))
160 .collect::<Vec<_>>()
161 .join("\n");
162
163 let cell_html = indent_html(&cell_html, 1);
164
165 format!("<tr>\n{cell_html}\n</tr>")
166 })
167 .collect::<Vec<_>>()
168 .join("\n");
169
170 let body_html = indent_html(&body_html, 2);
171
172 format!(
173 "<table>\n\t<thead>\n\t\t<tr>\n{header_html}\n\t\t</tr>\n\t</thead>\n\t<tbody>\n{body_html}\n\t</tbody>\n</table>"
174 )
175 }
176 MdBlockElement::BlockQuote { content } => {
177 let inner_html = content
178 .iter()
179 .map(|el| el.to_html(output_dir, input_dir, html_rel_path))
180 .collect::<String>();
181
182 format!("<blockquote>\n{inner_html}\n</blockquote>")
183 }
184 MdBlockElement::RawHtml { content } => {
185 format!("{}\n", content)
186 }
187 }
188 }
189}
190
191fn clean_id(old_id: String) -> String {
193 let mut new_id = String::new();
194
195 let mut in_tag = false;
196 for char in old_id.chars() {
197 if char == '<' {
198 in_tag = true;
199 } else if char == '>' {
200 in_tag = false;
201 continue;
202 }
203
204 if !in_tag && (char.is_alphanumeric() || char == '_' || char == ' ') {
205 new_id.push(char);
206 }
207 }
208
209 new_id
210 .replace([' ', '_'], "-")
211 .to_lowercase()
212 .trim_matches('-')
213 .to_string()
214}
215
216#[derive(Debug, PartialEq)]
221pub struct MdListItem {
222 pub content: MdBlockElement,
223}
224
225impl ToHtml for MdListItem {
226 fn to_html(&self, output_dir: &str, input_dir: &str, html_rel_path: &str) -> String {
227 match &self.content {
228 MdBlockElement::UnorderedList { items } => {
229 let inner_items = items
230 .iter()
231 .map(|item| item.to_html(output_dir, input_dir, html_rel_path))
232 .collect::<String>();
233 let inner_items = indent_html(&inner_items, 1);
234 format!("<ul>\n{inner_items}\n</ul>")
235 }
236 MdBlockElement::OrderedList {
237 items,
238 starting_num,
239 } => {
240 let inner_items = items
241 .iter()
242 .map(|item| item.to_html(output_dir, input_dir, html_rel_path))
243 .collect::<String>();
244 format!("<ol start=\"{starting_num}\">\n{inner_items}\n</ol>")
245 }
246 _ => {
247 let inner_html = indent_html(
248 &self.content.to_html(output_dir, input_dir, html_rel_path),
249 1,
250 );
251 format!("<li>\n{inner_html}\n</li>\n")
252 }
253 }
254 }
255}
256
257#[derive(Debug, PartialEq, Clone)]
259pub struct MdTableCell {
260 pub content: Vec<MdInlineElement>,
261 pub alignment: TableAlignment,
262 pub is_header: bool,
263}
264
265impl ToHtml for MdTableCell {
266 fn to_html(&self, output_dir: &str, input_dir: &str, html_rel_path: &str) -> String {
267 let inner_html = self
268 .content
269 .iter()
270 .map(|el| el.to_html(output_dir, input_dir, html_rel_path))
271 .collect::<String>();
272
273 let text_alignment = match self.alignment {
274 TableAlignment::Left | TableAlignment::None => "left",
275 TableAlignment::Center => "center",
276 TableAlignment::Right => "right",
277 };
278
279 match self.is_header {
280 true => format!("<th style=\"text-align:{text_alignment};\">{inner_html}</th>"),
281 false => format!("<td style=\"text-align:{text_alignment};\">{inner_html}</td>"),
282 }
283 }
284}
285
286#[derive(Debug, PartialEq, Clone)]
288pub enum TableAlignment {
289 Left,
290 Center,
291 Right,
292 None,
293}
294
295#[derive(Debug, PartialEq, Clone)]
297pub enum MdInlineElement {
298 Text {
299 content: String,
300 },
301 Bold {
302 content: Vec<MdInlineElement>,
303 },
304 Italic {
305 content: Vec<MdInlineElement>,
306 },
307 Link {
308 text: Vec<MdInlineElement>,
309 title: Option<String>,
310 url: String,
311 },
312 Image {
313 alt_text: String,
314 title: Option<String>,
315 url: String,
316 },
317 Code {
318 content: String,
319 },
320 Placeholder {
321 ch: char,
322 token_position: usize,
323 },
324}
325
326impl From<String> for MdInlineElement {
327 fn from(s: String) -> Self {
328 MdInlineElement::Text {
329 content: s.to_string(),
330 }
331 }
332}
333
334impl ToHtml for MdInlineElement {
335 fn to_html(&self, output_dir: &str, input_dir: &str, html_rel_path: &str) -> String {
336 match self {
337 MdInlineElement::Text { content } => content.clone(),
338 MdInlineElement::Bold { content } => {
339 let inner_html = content
340 .iter()
341 .map(|el| el.to_html(output_dir, input_dir, html_rel_path))
342 .collect::<String>();
343 format!("<b>{}</b>", inner_html)
344 }
345 MdInlineElement::Italic { content } => {
346 let inner_html = content
347 .iter()
348 .map(|el| el.to_html(output_dir, input_dir, html_rel_path))
349 .collect::<String>();
350 format!("<i>{}</i>", inner_html)
351 }
352 MdInlineElement::Link { text, title, url } => {
353 let label_html = text
354 .iter()
355 .map(|el| el.to_html(output_dir, input_dir, html_rel_path))
356 .collect::<String>();
357
358 if url.contains("youtube.com") && url.contains("v=") {
359 let video_id = url
360 .split("v=")
361 .nth(1)
362 .and_then(|s| s.split('&').next())
363 .unwrap_or("");
364
365 return format!(
366 r#"<div class="video-container">
367 <iframe width="560" height="315" src="https://www.youtube.com/embed/{}"
368 title="YouTube video player" frameborder="0" allowfullscreen></iframe>
369 </div>"#,
370 video_id
371 );
372 }
373
374 if url.starts_with("http") {
376 match title {
377 Some(text) => {
378 format!(
379 "<a href=\"{url}\" title=\"{text}\" target=\"_blank\">{label_html}⮺</a>"
380 )
381 }
382 None => format!("<a href=\"{url}\" target=\"_blank\">{label_html}⮺</a>"),
383 }
384 } else {
385 match title {
386 Some(text) => {
387 format!("<a href=\"{url}\" title=\"{text}\">{label_html}</a>")
388 }
389 None => format!("<a href=\"{url}\">{label_html}</a>"),
390 }
391 }
392 }
393 MdInlineElement::Image {
394 alt_text,
395 title,
396 url,
397 } => {
398 let media_url = if !url.starts_with("http") {
400 if let Err(e) = copy_image_to_output_dir(url, output_dir, input_dir) {
401 warn!("Unable to copy image {url}: {e}");
402 }
403
404 let url = url.rsplit('/').next().unwrap_or(url);
406
407 let rel_prefix = build_rel_prefix(html_rel_path);
408
409 &format!("./{}/media/{}", rel_prefix.to_string_lossy(), url)
410 } else {
411 url
412 };
413
414 match title {
415 Some(text) => {
416 format!("<img src=\"{media_url}\" alt=\"{alt_text}\" title=\"{text}\"/>")
417 }
418 None => format!("<img src=\"{media_url}\" alt=\"{alt_text}\"/>"),
419 }
420 }
421 MdInlineElement::Code { content } => format!("<code>{content}</code>"),
422 MdInlineElement::Placeholder {
423 ch,
424 token_position: _,
425 } => ch.to_string(),
426 }
427 }
428}
429
430impl MdInlineElement {
431 pub fn to_plain_text(&self) -> String {
433 match self {
434 MdInlineElement::Text { content } => content.clone(),
435 MdInlineElement::Bold { content } => content
436 .iter()
437 .map(MdInlineElement::to_plain_text)
438 .collect::<Vec<_>>()
439 .join(""),
440 MdInlineElement::Italic { content } => content
441 .iter()
442 .map(MdInlineElement::to_plain_text)
443 .collect::<Vec<_>>()
444 .join(""),
445 MdInlineElement::Link { text, .. } => text
446 .iter()
447 .map(MdInlineElement::to_plain_text)
448 .collect::<Vec<_>>()
449 .join(""),
450 MdInlineElement::Image { alt_text, .. } => alt_text.clone(),
451 MdInlineElement::Code { content } => content.clone(),
452 MdInlineElement::Placeholder {
453 ch,
454 token_position: _,
455 } => ch.to_string(),
456 }
457 }
458}
459
460#[derive(Debug)]
469pub struct TokenCursor {
470 pub tokens: Vec<Token>,
471 pub current_position: usize,
472}
473
474impl TokenCursor {
475 pub fn current(&self) -> Option<&Token> {
477 self.tokens.get(self.current_position)
478 }
479
480 pub fn peek_ahead(&self, n: usize) -> Option<&Token> {
489 self.tokens.get(self.current_position + n)
490 }
491
492 pub fn _peek_behind(&self, n: usize) -> Option<&Token> {
500 self.tokens.get(self.current_position - n)
501 }
502
503 pub fn advance(&mut self) {
505 if self.current_position < self.tokens.len() {
506 self.current_position += 1;
507 }
508 }
509
510 pub fn _set_position(&mut self, pos: usize) {
518 if pos < self.tokens.len() {
519 self.current_position = pos;
520 } else {
521 panic!("Position {pos} is out of bounds for the TokenCursor");
522 }
523 }
524
525 pub fn position(&self) -> usize {
527 self.current_position
528 }
529
530 pub fn is_at_eof(&self) -> bool {
532 self.current_position >= self.tokens.len()
533 }
534}
535
536#[derive(Debug, Clone)]
551pub struct Delimiter {
552 pub ch: char,
553 pub run_length: usize,
554 pub token_position: usize,
555 pub parsed_position: usize,
556 pub active: bool,
557 pub can_open: bool, pub can_close: bool, }
560
561impl Delimiter {
562 pub fn classify_flanking(&mut self, tokens: &[Token]) {
574 let before = if self.token_position > 0 {
575 Some(&tokens[self.token_position - 1])
576 } else {
577 None
578 };
579
580 let after = tokens.get(self.token_position + 1);
581 let followed_by_whitespace = after.is_none_or(is_whitespace);
582 let followed_by_punctuation = after.is_some_and(is_punctuation);
583
584 let preceded_by_whitespace = before.is_none_or(is_whitespace);
585 let preceded_by_punctuation = before.is_some_and(is_punctuation);
586
587 let is_left_flanking = if followed_by_whitespace {
588 false
589 } else if !followed_by_punctuation {
590 true
591 } else {
592 preceded_by_whitespace || preceded_by_punctuation
593 };
594
595 let is_right_flanking = if preceded_by_whitespace {
596 false
597 } else if !preceded_by_punctuation {
598 true
599 } else {
600 followed_by_whitespace || followed_by_punctuation
601 };
602
603 let delimiter_char = self.ch;
604
605 let is_underscore = delimiter_char == '_';
607
608 if is_underscore {
609 self.can_open = is_left_flanking && (!is_right_flanking || followed_by_punctuation);
610
611 self.can_close = is_right_flanking && (!is_left_flanking || followed_by_punctuation);
612 } else {
613 self.can_open = is_left_flanking;
614 self.can_close = is_right_flanking;
615 }
616 }
617}
618
619fn is_whitespace(token: &Token) -> bool {
624 matches!(token, Token::Newline | Token::Whitespace)
625}
626
627fn is_punctuation(token: &Token) -> bool {
632 matches!(
633 token,
634 Token::Punctuation(_)
635 | Token::EmphasisRun {
636 delimiter: _,
637 length: _
638 }
639 | Token::OpenBracket
640 | Token::CloseBracket
641 | Token::OpenParenthesis
642 | Token::CloseParenthesis
643 )
644}