headless_lms_utils/
strings.rs

1use once_cell::sync::Lazy;
2use rand::{RngExt, distr::Alphanumeric, rng};
3use regex::Regex;
4
5static IETF_LANGUAGE_CODE_REGEX: Lazy<Regex> = Lazy::new(|| {
6    Regex::new(r"^[a-z]{2,3}(-[A-Z][a-z]{3})?(-[A-Z]{2})?$")
7        .expect("Invalid IETF language code regex.")
8});
9
10pub fn generate_random_string(length: usize) -> String {
11    rng()
12        .sample_iter(Alphanumeric)
13        .take(length)
14        .map(char::from)
15        .collect()
16}
17
18pub fn generate_easily_writable_random_string(length: usize) -> String {
19    rng()
20        .sample_iter(Alphanumeric)
21        .filter(|c: &u8| c.is_ascii_lowercase() || c.is_ascii_digit())
22        // Filter out characters that might be confused with each other
23        .filter(|c| c != &b'l' && c != &b'1' && c != &b'o' && c != &b'0')
24        .take(length)
25        .map(char::from)
26        .collect()
27}
28
29/// Checks whether the string is IETF language code where subtags are separated with underscore.
30pub fn is_ietf_language_code_like(string: &str) -> bool {
31    IETF_LANGUAGE_CODE_REGEX.is_match(string)
32}
33
34/// Truncates UTF-8 text to a max byte length at a valid char boundary.
35pub fn truncate_utf8_at_boundary(s: &str, max_bytes: usize) -> &str {
36    if s.len() <= max_bytes {
37        return s;
38    }
39    let mut idx = max_bytes;
40    while idx > 0 && !s.is_char_boundary(idx) {
41        idx -= 1;
42    }
43    &s[..idx]
44}
45
46#[cfg(test)]
47mod test {
48    use super::*;
49
50    #[test]
51    fn ietf_language_code_validation_works() {
52        // Invalid scenarios
53        assert!(!is_ietf_language_code_like(""));
54        assert!(!is_ietf_language_code_like("en_us"));
55        assert!(!is_ietf_language_code_like("en_US"));
56        assert!(!is_ietf_language_code_like("in-cans"));
57        assert!(!is_ietf_language_code_like("in-cans-ca"));
58
59        // Valid scenarios
60        assert!(is_ietf_language_code_like("en"));
61        assert!(is_ietf_language_code_like("eng"));
62        assert!(is_ietf_language_code_like("en-US"));
63        assert!(is_ietf_language_code_like("in-Cans-CA"));
64    }
65
66    #[test]
67    fn truncate_utf8_at_boundary_returns_original_when_short() {
68        let input = "heillä";
69        let result = truncate_utf8_at_boundary(input, 255);
70        assert_eq!(result, input);
71    }
72
73    #[test]
74    fn truncate_utf8_at_boundary_handles_finnish_characters() {
75        let input = format!("{}äz", "a".repeat(254));
76        let result = truncate_utf8_at_boundary(&input, 255);
77        assert_eq!(result.as_bytes().len(), 254);
78        assert!(result.is_char_boundary(result.len()));
79        assert_eq!(result, "a".repeat(254));
80    }
81
82    #[test]
83    fn truncate_utf8_at_boundary_handles_emoji() {
84        let input = format!("{}😀z", "a".repeat(254));
85        let result = truncate_utf8_at_boundary(&input, 255);
86        assert_eq!(result.as_bytes().len(), 254);
87        assert!(result.is_char_boundary(result.len()));
88        assert_eq!(result, "a".repeat(254));
89    }
90}