1use std::sync::LazyLock;
2
3use regex::Regex;
4
5static UUID_RE: LazyLock<Regex> = LazyLock::new(|| {
6 Regex::new(r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}")
7 .expect("valid regex")
8});
9static HEX_ADDR_RE: LazyLock<Regex> =
10 LazyLock::new(|| Regex::new(r"0x[0-9a-fA-F]{6,}").expect("valid regex"));
11static TIMESTAMP_RE: LazyLock<Regex> =
12 LazyLock::new(|| Regex::new(r"\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}").expect("valid regex"));
13static LONG_NUMBER_RE: LazyLock<Regex> =
14 LazyLock::new(|| Regex::new(r"\b\d{5,}\b").expect("valid regex"));
15static BUNDLER_HASH_RE: LazyLock<Regex> =
16 LazyLock::new(|| Regex::new(r"\.[0-9a-f]{8,}\.(js|css|wasm|map)").expect("valid regex"));
17
18pub fn normalize_message(message: &str) -> String {
21 let s = UUID_RE.replace_all(message, "{uuid}");
23 let s = HEX_ADDR_RE.replace_all(&s, "{addr}");
24 let s = TIMESTAMP_RE.replace_all(&s, "{timestamp}");
25 let s = LONG_NUMBER_RE.replace_all(&s, "{N}");
26 s.into_owned()
27}
28
29pub fn normalize_stack_trace(stack_trace: &str) -> String {
32 let s = UUID_RE.replace_all(stack_trace, "{uuid}");
33 let s = HEX_ADDR_RE.replace_all(&s, "{addr}");
34 let s = TIMESTAMP_RE.replace_all(&s, "{timestamp}");
35 let s = BUNDLER_HASH_RE.replace_all(&s, ".{hash}.$1");
37 let s = LONG_NUMBER_RE.replace_all(&s, "{N}");
38 s.lines().map(str::trim).collect::<Vec<_>>().join("\n")
40}
41
42pub fn canonicalize_grouping_message(normalized_message: &str) -> String {
43 normalized_message
44 .split_whitespace()
45 .collect::<Vec<_>>()
46 .join(" ")
47 .to_lowercase()
48}
49
50fn hash_identifier(parts: &[&str]) -> String {
51 let mut hasher = blake3::Hasher::new();
52 for (idx, part) in parts.iter().enumerate() {
53 if idx > 0 {
54 hasher.update(b"\x00");
55 }
56 hasher.update(part.as_bytes());
57 }
58 hasher.finalize().to_hex().to_string()
59}
60
61pub fn calculate_exact_error_identifier(
66 service: &str,
67 error_source: &str,
68 message: &str,
69 stack_trace: Option<&str>,
70) -> String {
71 let normalized_message = normalize_message(message);
72 let normalized_stack = stack_trace.map(normalize_stack_trace);
73
74 hash_identifier(&[
75 service,
76 error_source,
77 normalized_message.as_str(),
78 normalized_stack.as_deref().unwrap_or(""),
79 ])
80}
81
82pub fn calculate_error_grouping_identifier(
84 service: &str,
85 error_source: &str,
86 message: &str,
87) -> String {
88 let normalized_message = normalize_message(message);
89 let grouping_message = canonicalize_grouping_message(&normalized_message);
90
91 hash_identifier(&[service, error_source, grouping_message.as_str()])
92}
93
94#[cfg(test)]
95mod tests {
96 use super::*;
97
98 #[test]
99 fn test_normalize_message_uuid() {
100 let msg = "User 550e8400-e29b-41d4-a716-446655440000 not found";
101 assert_eq!(normalize_message(msg), "User {uuid} not found");
102 }
103
104 #[test]
105 fn test_normalize_message_hex_addr() {
106 let msg = "Segfault at 0x7f2a3b4c5d6e in thread";
107 assert_eq!(normalize_message(msg), "Segfault at {addr} in thread");
108 }
109
110 #[test]
111 fn test_normalize_message_timestamp() {
112 let msg = "Request failed at 2024-01-15T10:30:00 with status 503";
113 assert_eq!(
114 normalize_message(msg),
115 "Request failed at {timestamp} with status 503"
116 );
117 }
118
119 #[test]
120 fn test_normalize_message_long_number() {
121 let msg = "Record 123456 not found";
122 assert_eq!(normalize_message(msg), "Record {N} not found");
123 }
124
125 #[test]
126 fn test_normalize_message_short_number_unchanged() {
127 let msg = "HTTP 500 error on route /api";
128 assert_eq!(normalize_message(msg), "HTTP 500 error on route /api");
129 }
130
131 #[test]
132 fn test_normalize_message_multiple_patterns() {
133 let msg = "User 550e8400-e29b-41d4-a716-446655440000 (id=987654) at 0x7f2a3b4c5d6e";
134 assert_eq!(normalize_message(msg), "User {uuid} (id={N}) at {addr}");
135 }
136
137 #[test]
138 fn test_normalize_stack_trace_hex_addr() {
139 let trace = "at process (0x00007f0a1234abcd)";
140 assert_eq!(normalize_stack_trace(trace), "at process ({addr})");
141 }
142
143 #[test]
144 fn test_normalize_stack_trace_bundler_hash_js() {
145 let trace = "at fn (app.abc12345def0.js:10:5)";
146 assert_eq!(normalize_stack_trace(trace), "at fn (app.{hash}.js:10:5)");
147 }
148
149 #[test]
150 fn test_normalize_stack_trace_bundler_hash_css() {
151 let trace = "loaded styles.abc98765def0.css";
152 assert_eq!(normalize_stack_trace(trace), "loaded styles.{hash}.css");
153 }
154
155 #[test]
156 fn test_normalize_stack_trace_bundler_hash_digits_only() {
157 let trace = "at fn (app.12345678.js:10:5)";
158 assert_eq!(normalize_stack_trace(trace), "at fn (app.{hash}.js:10:5)");
159 }
160
161 #[test]
162 fn test_normalize_stack_trace_line_trimming() {
163 let trace = " at foo (bar.js:1:1) \n at baz (qux.js:2:2) ";
164 assert_eq!(
165 normalize_stack_trace(trace),
166 "at foo (bar.js:1:1)\nat baz (qux.js:2:2)"
167 );
168 }
169
170 #[test]
171 fn test_same_error_different_uuids_same_exact_identifier() {
172 let fp1 = calculate_exact_error_identifier(
173 "main-frontend",
174 "frontend",
175 "User 550e8400-e29b-41d4-a716-446655440000 not found",
176 None,
177 );
178 let fp2 = calculate_exact_error_identifier(
179 "main-frontend",
180 "frontend",
181 "User 660f9511-f3ac-52e5-b827-557766551111 not found",
182 None,
183 );
184 assert_eq!(fp1, fp2);
185 }
186
187 #[test]
188 fn test_same_error_different_hex_addr_in_stack_same_exact_identifier() {
189 let fp1 = calculate_exact_error_identifier(
190 "headless-lms",
191 "backend",
192 "null pointer dereference",
193 Some("at 0x7f0a1234abcd"),
194 );
195 let fp2 = calculate_exact_error_identifier(
196 "headless-lms",
197 "backend",
198 "null pointer dereference",
199 Some("at 0x7f9b5678efab"),
200 );
201 assert_eq!(fp1, fp2);
202 }
203
204 #[test]
205 fn test_same_stack_different_bundler_hash_same_exact_identifier() {
206 let fp1 = calculate_exact_error_identifier(
207 "main-frontend",
208 "frontend",
209 "Cannot read property",
210 Some("at fn (app.abc12345def0.js:10:5)"),
211 );
212 let fp2 = calculate_exact_error_identifier(
213 "main-frontend",
214 "frontend",
215 "Cannot read property",
216 Some("at fn (app.fed09876543.js:10:5)"),
217 );
218 assert_eq!(fp1, fp2);
219 }
220
221 #[test]
222 fn test_different_errors_different_exact_identifiers() {
223 let fp1 = calculate_exact_error_identifier(
224 "main-frontend",
225 "frontend",
226 "Cannot read property 'foo' of undefined",
227 None,
228 );
229 let fp2 = calculate_exact_error_identifier(
230 "main-frontend",
231 "frontend",
232 "Cannot read property 'bar' of undefined",
233 None,
234 );
235 assert_ne!(fp1, fp2);
236 }
237
238 #[test]
239 fn test_source_affects_exact_identifier() {
240 let fp1 = calculate_exact_error_identifier(
241 "main-frontend",
242 "frontend",
243 "an error occurred",
244 None,
245 );
246 let fp2 =
247 calculate_exact_error_identifier("main-frontend", "backend", "an error occurred", None);
248 assert_ne!(fp1, fp2);
249 }
250
251 #[test]
252 fn test_stack_presence_affects_exact_identifier() {
253 let fp1 = calculate_exact_error_identifier(
254 "main-frontend",
255 "frontend",
256 "an error",
257 Some("at foo (a.js:1:1)"),
258 );
259 let fp2 = calculate_exact_error_identifier("main-frontend", "frontend", "an error", None);
260 assert_ne!(fp1, fp2);
261 }
262
263 #[test]
264 fn test_separator_prevents_collision() {
265 let fp1 = calculate_exact_error_identifier("main-frontend", "frontend", "foobar", None);
266 let fp2 = calculate_exact_error_identifier("main-frontend", "frontend", "foo", Some("bar"));
267 assert_ne!(fp1, fp2);
268 }
269
270 #[test]
271 fn test_exact_identifier_is_deterministic() {
272 let fp1 = calculate_exact_error_identifier(
273 "headless-lms",
274 "backend",
275 "test error",
276 Some("stack trace"),
277 );
278 let fp2 = calculate_exact_error_identifier(
279 "headless-lms",
280 "backend",
281 "test error",
282 Some("stack trace"),
283 );
284 assert_eq!(fp1, fp2);
285 }
286
287 #[test]
288 fn test_exact_identifier_length() {
289 let fp = calculate_exact_error_identifier("main-frontend", "frontend", "error", None);
291 assert_eq!(fp.len(), 64);
292 }
293
294 #[test]
295 fn test_grouping_message_collapses_whitespace_and_case() {
296 let msg = " Cannot READ property {uuid} ";
297 assert_eq!(
298 canonicalize_grouping_message(msg),
299 "cannot read property {uuid}"
300 );
301 }
302
303 #[test]
304 fn test_grouping_identifier_is_case_and_whitespace_insensitive() {
305 let fp1 = calculate_error_grouping_identifier(
306 "main-frontend",
307 "frontend",
308 "Cannot read properties of undefined (reading 'foo')",
309 );
310 let fp2 = calculate_error_grouping_identifier(
311 "main-frontend",
312 "frontend",
313 " cannot read properties of undefined (reading 'foo') ",
314 );
315 assert_eq!(fp1, fp2);
316 }
317
318 #[test]
319 fn test_grouping_identifier_normalizes_dynamic_message_values() {
320 let fp1 = calculate_error_grouping_identifier(
321 "main-frontend",
322 "frontend",
323 "Request 123456 failed for user 550e8400-e29b-41d4-a716-446655440000",
324 );
325 let fp2 = calculate_error_grouping_identifier(
326 "main-frontend",
327 "frontend",
328 "Request 987654 failed for user 660f9511-f3ac-52e5-b827-557766551111",
329 );
330 assert_eq!(fp1, fp2);
331 }
332
333 #[test]
334 fn test_grouping_identifier_differs_for_different_messages() {
335 let fp1 = calculate_error_grouping_identifier(
336 "main-frontend",
337 "frontend",
338 "Cannot read properties of undefined",
339 );
340 let fp2 = calculate_error_grouping_identifier(
341 "main-frontend",
342 "frontend",
343 "Network request failed",
344 );
345 assert_ne!(fp1, fp2);
346 }
347}