headless_lms_utils/
email_processor.rs

1use std::collections::HashMap;
2
3use once_cell::sync::Lazy;
4use regex::{Captures, Regex};
5use serde::{Deserialize, Serialize};
6
7static LI_START_TAG_REGEX: Lazy<Regex> =
8    Lazy::new(|| Regex::new(r"<li>").expect("invalid li_start regex"));
9static LI_END_TAG_REGEX: Lazy<Regex> =
10    Lazy::new(|| Regex::new(r"</li>").expect("invalid li_end regex"));
11static ALL_TAG_REGEX: Lazy<Regex> =
12    Lazy::new(|| Regex::new(r"<.+?>").expect("invalid all_tags regex"));
13static DOUBLE_QUOTE_REGEX: Lazy<Regex> =
14    Lazy::new(|| Regex::new(r#"""#).expect("invalid double_quote regex"));
15
16#[derive(Debug, Deserialize, Serialize, PartialEq, Eq, Clone)]
17#[serde(tag = "type", content = "attributes")]
18pub enum BlockAttributes {
19    #[serde(rename = "core/paragraph")]
20    Paragraph {
21        content: String,
22        drop_cap: bool,
23        #[serde(flatten)]
24        rest: HashMap<String, serde_json::Value>,
25    },
26    #[serde(rename = "core/image")]
27    Image {
28        alt: String,
29        url: String,
30        #[serde(flatten)]
31        rest: HashMap<String, serde_json::Value>,
32    },
33    #[serde(rename = "core/heading")]
34    Heading {
35        content: String,
36        anchor: String,
37        level: i64,
38        #[serde(flatten)]
39        rest: HashMap<String, serde_json::Value>,
40    },
41    #[serde(rename = "core/list")]
42    List {
43        ordered: bool,
44        values: String,
45        #[serde(flatten)]
46        rest: HashMap<String, serde_json::Value>,
47    },
48}
49
50#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
51pub struct EmailGutenbergBlock {
52    #[serde(rename = "clientId")]
53    pub client_id: String,
54    #[serde(rename = "isValid")]
55    pub is_valid: bool,
56    #[serde(flatten)]
57    pub attributes: BlockAttributes,
58    #[serde(rename = "innerBlocks")]
59    pub inner_blocks: Vec<EmailGutenbergBlock>,
60}
61
62pub fn process_content_to_plaintext(blocks: &[EmailGutenbergBlock]) -> String {
63    let contents: Vec<String> = blocks
64        .iter()
65        .map(|block| match &block.attributes {
66            BlockAttributes::Paragraph { content, .. } => {
67                let res = ALL_TAG_REGEX.replace_all(content, "").to_string();
68                format!("{}\n\n", res)
69            }
70            BlockAttributes::Image { alt, url, .. } => {
71                let result = DOUBLE_QUOTE_REGEX.replace_all(alt, "").to_string();
72                format!("\"{}\", <{}>", result, url)
73            }
74            BlockAttributes::Heading { content, .. } => format!("{}\n\n\n", content),
75            BlockAttributes::List {
76                values, ordered, ..
77            } => {
78                if *ordered {
79                    let mut counter = 0;
80                    let first_tags = LI_START_TAG_REGEX
81                        .replace_all(values, |_caps: &Captures| {
82                            counter += 1;
83                            format!("{}. ", counter)
84                        })
85                        .to_string();
86                    let snd_tags = LI_END_TAG_REGEX.replace_all(&first_tags, "\n").to_string();
87                    ALL_TAG_REGEX.replace_all(&snd_tags, "").to_string()
88                } else {
89                    let first_tags = LI_START_TAG_REGEX.replace_all(values, "* ").to_string();
90                    let snd_tags = LI_END_TAG_REGEX.replace_all(&first_tags, "\n").to_string();
91                    ALL_TAG_REGEX.replace_all(&snd_tags, "").to_string()
92                }
93            }
94        })
95        .collect();
96    contents.join("\n")
97}
98
99pub fn process_content_to_html(blocks: &[EmailGutenbergBlock]) -> String {
100    let contents: Vec<String> = blocks
101        .iter()
102        .map(|block| match &block.attributes {
103            BlockAttributes::Paragraph {
104                content,
105                drop_cap: _,
106                ..
107            } => {
108                format!("<p>{}</p>", content)
109            }
110            BlockAttributes::Image { alt, url, .. } => {
111                format!(r#"<img src="{}" alt="{}"></img>"#, url, alt)
112            }
113            BlockAttributes::Heading { content, level, .. } => {
114                format!("<h{}>{}</h{}>", level, content, level)
115            }
116            BlockAttributes::List {
117                values, ordered, ..
118            } => {
119                if *ordered {
120                    format!("<ol>{}</ol>", values)
121                } else {
122                    format!("<ul>{}</ul>", values)
123                }
124            }
125        })
126        .collect();
127    contents.join("")
128}
129
130#[cfg(test)]
131mod email_processor_tests {
132    use pretty_assertions::assert_eq;
133    use uuid::Uuid;
134
135    use super::*;
136
137    #[test]
138    fn it_converts_paragraph_correctly_to_plain_text() {
139        let input = vec![EmailGutenbergBlock {
140            client_id: Uuid::new_v4().to_string(),
141            is_valid: true,
142            attributes: BlockAttributes::Paragraph {
143                content: String::from("testi paragraph."),
144                drop_cap: false,
145                rest: HashMap::new(),
146            },
147            inner_blocks: vec![],
148        }];
149
150        let result = process_content_to_plaintext(&input);
151
152        assert_eq!(String::from("testi paragraph.\n\n"), result);
153    }
154
155    #[test]
156    fn it_converts_paragraph_wrapped_in_tags_correctly_to_plain_text() {
157        let input = vec![EmailGutenbergBlock {
158            client_id: Uuid::new_v4().to_string(),
159            is_valid: true,
160            attributes: BlockAttributes::Paragraph {
161                content: String::from("<strong><em>testi paragraph.</em></strong>"),
162                drop_cap: false,
163                rest: HashMap::new(),
164            },
165            inner_blocks: vec![],
166        }];
167
168        let result = process_content_to_plaintext(&input);
169
170        assert_eq!(String::from("testi paragraph.\n\n"), result);
171    }
172
173    #[test]
174    fn it_converts_heading_correctly_to_plain_text() {
175        let input = vec![EmailGutenbergBlock {
176            client_id: Uuid::new_v4().to_string(),
177            is_valid: true,
178            attributes: BlockAttributes::Heading {
179                content: String::from("Email heading"),
180                anchor: String::from("email-heading"),
181                level: 2,
182                rest: HashMap::new(),
183            },
184            inner_blocks: vec![],
185        }];
186
187        let result = process_content_to_plaintext(&input);
188
189        assert_eq!(String::from("Email heading\n\n\n"), result);
190    }
191
192    #[test]
193    fn it_converts_image_correctly_to_plain_text() {
194        let input = vec![EmailGutenbergBlock {
195            client_id: Uuid::new_v4().to_string(),
196            is_valid: true,
197            attributes: BlockAttributes::Image {
198                alt: String::from("Alternative title"),
199                url: String::from("URL -of an image"),
200                rest: HashMap::new(),
201            },
202            inner_blocks: vec![],
203        }];
204
205        let result = process_content_to_plaintext(&input);
206
207        assert_eq!(
208            String::from("\"Alternative title\", <URL -of an image>"),
209            result
210        );
211    }
212    #[test]
213    fn it_converts_image_containing_double_quotes_correctly_to_plain_text() {
214        let input = vec![EmailGutenbergBlock {
215            client_id: Uuid::new_v4().to_string(),
216            is_valid: true,
217            attributes: BlockAttributes::Image {
218                alt: String::from(r#""Alternative title""#),
219                url: String::from("URL -of an image"),
220                rest: HashMap::new(),
221            },
222            inner_blocks: vec![],
223        }];
224
225        let result = process_content_to_plaintext(&input);
226
227        assert_eq!(
228            String::from("\"Alternative title\", <URL -of an image>"),
229            result
230        );
231    }
232
233    #[test]
234    fn it_converts_unordered_list_correctly_to_plain_text() {
235        let input = vec![EmailGutenbergBlock {
236            client_id: Uuid::new_v4().to_string(),
237            is_valid: true,
238            attributes: BlockAttributes::List {
239                values: String::from("<li>1</li><li>2</li><li>3</li><li>4</li>"),
240                ordered: false,
241                rest: HashMap::new(),
242            },
243            inner_blocks: vec![],
244        }];
245
246        let result = process_content_to_plaintext(&input);
247
248        assert_eq!(String::from("* 1\n* 2\n* 3\n* 4\n"), result);
249    }
250
251    #[test]
252    fn it_converts_unordered_list_containing_other_tags_correctly_to_plain_text() {
253        let input = vec![EmailGutenbergBlock {
254            client_id: Uuid::new_v4().to_string(),
255            is_valid: true,
256            attributes: BlockAttributes::List {
257                values: String::from(
258                    "<li><code>1</code></li><li><kbd>2</kbd></li><li>3</li><li>4</li>",
259                ),
260                ordered: false,
261                rest: HashMap::new(),
262            },
263            inner_blocks: vec![],
264        }];
265
266        let result = process_content_to_plaintext(&input);
267
268        assert_eq!(String::from("* 1\n* 2\n* 3\n* 4\n"), result);
269    }
270
271    #[test]
272    fn it_converts_ordered_list_correctly_to_plain_text() {
273        let input = vec![EmailGutenbergBlock {
274            client_id: Uuid::new_v4().to_string(),
275            is_valid: true,
276            attributes: BlockAttributes::List {
277                values: String::from("<li>first</li><li>second</li><li>third</li><li>fourth</li>"),
278                ordered: true,
279                rest: HashMap::new(),
280            },
281            inner_blocks: vec![],
282        }];
283
284        let result = process_content_to_plaintext(&input);
285
286        assert_eq!(
287            String::from("1. first\n2. second\n3. third\n4. fourth\n"),
288            result
289        );
290    }
291
292    #[test]
293    fn it_converts_ordered_list_containing_other_tags_correctly_to_plain_text() {
294        let input = vec![EmailGutenbergBlock {
295            client_id: Uuid::new_v4().to_string(),
296            is_valid: true,
297            attributes: BlockAttributes::List {
298                values: String::from(
299                    "<li><code>first</code></li><li><kbd>second</kbd></li><li>third</li><li>fourth</li>",
300                ),
301                ordered: true,
302                rest: HashMap::new(),
303            },
304            inner_blocks: vec![],
305        }];
306
307        let result = process_content_to_plaintext(&input);
308
309        assert_eq!(
310            String::from("1. first\n2. second\n3. third\n4. fourth\n"),
311            result
312        );
313    }
314
315    #[test]
316    fn it_converts_paragraph_correctly_to_html() {
317        let input = vec![EmailGutenbergBlock {
318            client_id: Uuid::new_v4().to_string(),
319            is_valid: true,
320            attributes: BlockAttributes::Paragraph {
321                content: String::from("testi paragraph."),
322                drop_cap: false,
323                rest: HashMap::new(),
324            },
325            inner_blocks: vec![],
326        }];
327
328        let result = process_content_to_html(&input);
329
330        assert_eq!(String::from("<p>testi paragraph.</p>"), result);
331    }
332
333    #[test]
334    fn it_converts_heading_correctly_to_html() {
335        let input = vec![EmailGutenbergBlock {
336            client_id: Uuid::new_v4().to_string(),
337            is_valid: true,
338            attributes: BlockAttributes::Heading {
339                content: String::from("Email heading"),
340                anchor: String::from("email-heading"),
341                level: 2,
342                rest: HashMap::new(),
343            },
344            inner_blocks: vec![],
345        }];
346
347        let result = process_content_to_html(&input);
348
349        assert_eq!(String::from("<h2>Email heading</h2>"), result);
350    }
351
352    #[test]
353    fn it_converts_image_correctly_to_html() {
354        let input = vec![EmailGutenbergBlock {
355            client_id: Uuid::new_v4().to_string(),
356            is_valid: true,
357            attributes: BlockAttributes::Image {
358                alt: String::from("Alternative title"),
359                url: String::from("URL -of an image"),
360                rest: HashMap::new(),
361            },
362            inner_blocks: vec![],
363        }];
364
365        let result = process_content_to_html(&input);
366
367        assert_eq!(
368            String::from(r#"<img src="URL -of an image" alt="Alternative title"></img>"#),
369            result
370        );
371    }
372
373    #[test]
374    fn it_converts_unordered_list_correctly_to_html() {
375        let input = vec![EmailGutenbergBlock {
376            client_id: Uuid::new_v4().to_string(),
377            is_valid: true,
378            attributes: BlockAttributes::List {
379                values: String::from("<li>1</li><li>2</li><li>3</li><li>4</li>"),
380                ordered: false,
381                rest: HashMap::new(),
382            },
383            inner_blocks: vec![],
384        }];
385
386        let result = process_content_to_html(&input);
387
388        assert_eq!(
389            String::from("<ul><li>1</li><li>2</li><li>3</li><li>4</li></ul>"),
390            result
391        );
392    }
393
394    #[test]
395    fn it_converts_unordered_list_containing_other_tags_correctly_to_html() {
396        let input = vec![EmailGutenbergBlock {
397            client_id: Uuid::new_v4().to_string(),
398            is_valid: true,
399            attributes: BlockAttributes::List {
400                values: String::from(
401                    "<li><code>1</code></li><li><kbd>2</kbd></li><li>3</li><li>4</li>",
402                ),
403                ordered: false,
404                rest: HashMap::new(),
405            },
406            inner_blocks: vec![],
407        }];
408
409        let result = process_content_to_html(&input);
410
411        assert_eq!(
412            String::from(
413                "<ul><li><code>1</code></li><li><kbd>2</kbd></li><li>3</li><li>4</li></ul>"
414            ),
415            result
416        );
417    }
418
419    #[test]
420    fn it_converts_ordered_list_correctly_to_html() {
421        let input = vec![EmailGutenbergBlock {
422            client_id: Uuid::new_v4().to_string(),
423            is_valid: true,
424            attributes: BlockAttributes::List {
425                values: String::from("<li>first</li><li>second</li><li>third</li><li>fourth</li>"),
426                ordered: true,
427                rest: HashMap::new(),
428            },
429            inner_blocks: vec![],
430        }];
431
432        let result = process_content_to_html(&input);
433
434        assert_eq!(
435            String::from("<ol><li>first</li><li>second</li><li>third</li><li>fourth</li></ol>"),
436            result
437        );
438    }
439
440    #[test]
441    fn it_converts_ordered_list_containing_other_tags_correctly_to_html() {
442        let input = vec![EmailGutenbergBlock {
443            client_id: Uuid::new_v4().to_string(),
444            is_valid: true,
445            attributes: BlockAttributes::List {
446                values: String::from(
447                    "<li><code>first</code></li><li><kbd>second</kbd></li><li>third</li><li>fourth</li>",
448                ),
449                ordered: true,
450                rest: HashMap::new(),
451            },
452            inner_blocks: vec![],
453        }];
454
455        let result = process_content_to_html(&input);
456
457        assert_eq!(
458            String::from(
459                "<ol><li><code>first</code></li><li><kbd>second</kbd></li><li>third</li><li>fourth</li></ol>"
460            ),
461            result
462        );
463    }
464}