Skip to main content

headless_lms_utils/
error_identifier.rs

1use std::sync::LazyLock;
2
3use regex::Regex;
4
5static UUID_RE: LazyLock<Regex> = LazyLock::new(|| {
6    Regex::new(r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}")
7        .expect("valid regex")
8});
9static HEX_ADDR_RE: LazyLock<Regex> =
10    LazyLock::new(|| Regex::new(r"0x[0-9a-fA-F]{6,}").expect("valid regex"));
11static TIMESTAMP_RE: LazyLock<Regex> =
12    LazyLock::new(|| Regex::new(r"\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}").expect("valid regex"));
13static LONG_NUMBER_RE: LazyLock<Regex> =
14    LazyLock::new(|| Regex::new(r"\b\d{5,}\b").expect("valid regex"));
15static BUNDLER_HASH_RE: LazyLock<Regex> =
16    LazyLock::new(|| Regex::new(r"\.[0-9a-f]{8,}\.(js|css|wasm|map)").expect("valid regex"));
17
18/// Normalizes dynamic values out of an error message so that errors with
19/// different UUIDs, addresses, or IDs still hash to the same identifier.
20pub fn normalize_message(message: &str) -> String {
21    // Order matters: UUIDs before long numbers (UUID contains long numeric runs).
22    let s = UUID_RE.replace_all(message, "{uuid}");
23    let s = HEX_ADDR_RE.replace_all(&s, "{addr}");
24    let s = TIMESTAMP_RE.replace_all(&s, "{timestamp}");
25    let s = LONG_NUMBER_RE.replace_all(&s, "{N}");
26    s.into_owned()
27}
28
29/// Normalizes a stack trace: strips dynamic addresses and bundler hashes,
30/// and trims each line.
31pub fn normalize_stack_trace(stack_trace: &str) -> String {
32    let s = UUID_RE.replace_all(stack_trace, "{uuid}");
33    let s = HEX_ADDR_RE.replace_all(&s, "{addr}");
34    let s = TIMESTAMP_RE.replace_all(&s, "{timestamp}");
35    // Strip webpack/vite/esbuild content hashes from filenames.
36    let s = BUNDLER_HASH_RE.replace_all(&s, ".{hash}.$1");
37    let s = LONG_NUMBER_RE.replace_all(&s, "{N}");
38    // Trim each line.
39    s.lines().map(str::trim).collect::<Vec<_>>().join("\n")
40}
41
42pub fn canonicalize_grouping_message(normalized_message: &str) -> String {
43    normalized_message
44        .split_whitespace()
45        .collect::<Vec<_>>()
46        .join(" ")
47        .to_lowercase()
48}
49
50fn hash_identifier(parts: &[&str]) -> String {
51    let mut hasher = blake3::Hasher::new();
52    for (idx, part) in parts.iter().enumerate() {
53        if idx > 0 {
54            hasher.update(b"\x00");
55        }
56        hasher.update(part.as_bytes());
57    }
58    hasher.finalize().to_hex().to_string()
59}
60
61/// Computes a stable BLAKE3 identifier for an exact error variant.
62///
63/// Components are separated by null bytes so that ("foo", "") and ("", "foo")
64/// never collide.
65pub fn calculate_exact_error_identifier(
66    service: &str,
67    error_source: &str,
68    message: &str,
69    stack_trace: Option<&str>,
70) -> String {
71    let normalized_message = normalize_message(message);
72    let normalized_stack = stack_trace.map(normalize_stack_trace);
73
74    hash_identifier(&[
75        service,
76        error_source,
77        normalized_message.as_str(),
78        normalized_stack.as_deref().unwrap_or(""),
79    ])
80}
81
82/// Computes a stable BLAKE3 identifier for broadly grouping related errors.
83pub fn calculate_error_grouping_identifier(
84    service: &str,
85    error_source: &str,
86    message: &str,
87) -> String {
88    let normalized_message = normalize_message(message);
89    let grouping_message = canonicalize_grouping_message(&normalized_message);
90
91    hash_identifier(&[service, error_source, grouping_message.as_str()])
92}
93
94#[cfg(test)]
95mod tests {
96    use super::*;
97
98    #[test]
99    fn test_normalize_message_uuid() {
100        let msg = "User 550e8400-e29b-41d4-a716-446655440000 not found";
101        assert_eq!(normalize_message(msg), "User {uuid} not found");
102    }
103
104    #[test]
105    fn test_normalize_message_hex_addr() {
106        let msg = "Segfault at 0x7f2a3b4c5d6e in thread";
107        assert_eq!(normalize_message(msg), "Segfault at {addr} in thread");
108    }
109
110    #[test]
111    fn test_normalize_message_timestamp() {
112        let msg = "Request failed at 2024-01-15T10:30:00 with status 503";
113        assert_eq!(
114            normalize_message(msg),
115            "Request failed at {timestamp} with status 503"
116        );
117    }
118
119    #[test]
120    fn test_normalize_message_long_number() {
121        let msg = "Record 123456 not found";
122        assert_eq!(normalize_message(msg), "Record {N} not found");
123    }
124
125    #[test]
126    fn test_normalize_message_short_number_unchanged() {
127        let msg = "HTTP 500 error on route /api";
128        assert_eq!(normalize_message(msg), "HTTP 500 error on route /api");
129    }
130
131    #[test]
132    fn test_normalize_message_multiple_patterns() {
133        let msg = "User 550e8400-e29b-41d4-a716-446655440000 (id=987654) at 0x7f2a3b4c5d6e";
134        assert_eq!(normalize_message(msg), "User {uuid} (id={N}) at {addr}");
135    }
136
137    #[test]
138    fn test_normalize_stack_trace_hex_addr() {
139        let trace = "at process (0x00007f0a1234abcd)";
140        assert_eq!(normalize_stack_trace(trace), "at process ({addr})");
141    }
142
143    #[test]
144    fn test_normalize_stack_trace_bundler_hash_js() {
145        let trace = "at fn (app.abc12345def0.js:10:5)";
146        assert_eq!(normalize_stack_trace(trace), "at fn (app.{hash}.js:10:5)");
147    }
148
149    #[test]
150    fn test_normalize_stack_trace_bundler_hash_css() {
151        let trace = "loaded styles.abc98765def0.css";
152        assert_eq!(normalize_stack_trace(trace), "loaded styles.{hash}.css");
153    }
154
155    #[test]
156    fn test_normalize_stack_trace_bundler_hash_digits_only() {
157        let trace = "at fn (app.12345678.js:10:5)";
158        assert_eq!(normalize_stack_trace(trace), "at fn (app.{hash}.js:10:5)");
159    }
160
161    #[test]
162    fn test_normalize_stack_trace_line_trimming() {
163        let trace = "   at foo (bar.js:1:1)   \n   at baz (qux.js:2:2)   ";
164        assert_eq!(
165            normalize_stack_trace(trace),
166            "at foo (bar.js:1:1)\nat baz (qux.js:2:2)"
167        );
168    }
169
170    #[test]
171    fn test_same_error_different_uuids_same_exact_identifier() {
172        let fp1 = calculate_exact_error_identifier(
173            "main-frontend",
174            "frontend",
175            "User 550e8400-e29b-41d4-a716-446655440000 not found",
176            None,
177        );
178        let fp2 = calculate_exact_error_identifier(
179            "main-frontend",
180            "frontend",
181            "User 660f9511-f3ac-52e5-b827-557766551111 not found",
182            None,
183        );
184        assert_eq!(fp1, fp2);
185    }
186
187    #[test]
188    fn test_same_error_different_hex_addr_in_stack_same_exact_identifier() {
189        let fp1 = calculate_exact_error_identifier(
190            "headless-lms",
191            "backend",
192            "null pointer dereference",
193            Some("at 0x7f0a1234abcd"),
194        );
195        let fp2 = calculate_exact_error_identifier(
196            "headless-lms",
197            "backend",
198            "null pointer dereference",
199            Some("at 0x7f9b5678efab"),
200        );
201        assert_eq!(fp1, fp2);
202    }
203
204    #[test]
205    fn test_same_stack_different_bundler_hash_same_exact_identifier() {
206        let fp1 = calculate_exact_error_identifier(
207            "main-frontend",
208            "frontend",
209            "Cannot read property",
210            Some("at fn (app.abc12345def0.js:10:5)"),
211        );
212        let fp2 = calculate_exact_error_identifier(
213            "main-frontend",
214            "frontend",
215            "Cannot read property",
216            Some("at fn (app.fed09876543.js:10:5)"),
217        );
218        assert_eq!(fp1, fp2);
219    }
220
221    #[test]
222    fn test_different_errors_different_exact_identifiers() {
223        let fp1 = calculate_exact_error_identifier(
224            "main-frontend",
225            "frontend",
226            "Cannot read property 'foo' of undefined",
227            None,
228        );
229        let fp2 = calculate_exact_error_identifier(
230            "main-frontend",
231            "frontend",
232            "Cannot read property 'bar' of undefined",
233            None,
234        );
235        assert_ne!(fp1, fp2);
236    }
237
238    #[test]
239    fn test_source_affects_exact_identifier() {
240        let fp1 = calculate_exact_error_identifier(
241            "main-frontend",
242            "frontend",
243            "an error occurred",
244            None,
245        );
246        let fp2 =
247            calculate_exact_error_identifier("main-frontend", "backend", "an error occurred", None);
248        assert_ne!(fp1, fp2);
249    }
250
251    #[test]
252    fn test_stack_presence_affects_exact_identifier() {
253        let fp1 = calculate_exact_error_identifier(
254            "main-frontend",
255            "frontend",
256            "an error",
257            Some("at foo (a.js:1:1)"),
258        );
259        let fp2 = calculate_exact_error_identifier("main-frontend", "frontend", "an error", None);
260        assert_ne!(fp1, fp2);
261    }
262
263    #[test]
264    fn test_separator_prevents_collision() {
265        let fp1 = calculate_exact_error_identifier("main-frontend", "frontend", "foobar", None);
266        let fp2 = calculate_exact_error_identifier("main-frontend", "frontend", "foo", Some("bar"));
267        assert_ne!(fp1, fp2);
268    }
269
270    #[test]
271    fn test_exact_identifier_is_deterministic() {
272        let fp1 = calculate_exact_error_identifier(
273            "headless-lms",
274            "backend",
275            "test error",
276            Some("stack trace"),
277        );
278        let fp2 = calculate_exact_error_identifier(
279            "headless-lms",
280            "backend",
281            "test error",
282            Some("stack trace"),
283        );
284        assert_eq!(fp1, fp2);
285    }
286
287    #[test]
288    fn test_exact_identifier_length() {
289        // BLAKE3 produces 32 bytes = 64 hex chars by default
290        let fp = calculate_exact_error_identifier("main-frontend", "frontend", "error", None);
291        assert_eq!(fp.len(), 64);
292    }
293
294    #[test]
295    fn test_grouping_message_collapses_whitespace_and_case() {
296        let msg = "  Cannot READ   property   {uuid}   ";
297        assert_eq!(
298            canonicalize_grouping_message(msg),
299            "cannot read property {uuid}"
300        );
301    }
302
303    #[test]
304    fn test_grouping_identifier_is_case_and_whitespace_insensitive() {
305        let fp1 = calculate_error_grouping_identifier(
306            "main-frontend",
307            "frontend",
308            "Cannot read properties of undefined (reading 'foo')",
309        );
310        let fp2 = calculate_error_grouping_identifier(
311            "main-frontend",
312            "frontend",
313            "  cannot read   properties of undefined (reading 'foo')  ",
314        );
315        assert_eq!(fp1, fp2);
316    }
317
318    #[test]
319    fn test_grouping_identifier_normalizes_dynamic_message_values() {
320        let fp1 = calculate_error_grouping_identifier(
321            "main-frontend",
322            "frontend",
323            "Request 123456 failed for user 550e8400-e29b-41d4-a716-446655440000",
324        );
325        let fp2 = calculate_error_grouping_identifier(
326            "main-frontend",
327            "frontend",
328            "Request 987654 failed for user 660f9511-f3ac-52e5-b827-557766551111",
329        );
330        assert_eq!(fp1, fp2);
331    }
332
333    #[test]
334    fn test_grouping_identifier_differs_for_different_messages() {
335        let fp1 = calculate_error_grouping_identifier(
336            "main-frontend",
337            "frontend",
338            "Cannot read properties of undefined",
339        );
340        let fp2 = calculate_error_grouping_identifier(
341            "main-frontend",
342            "frontend",
343            "Network request failed",
344        );
345        assert_ne!(fp1, fp2);
346    }
347}