icu_pattern/multi_named.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Code for the [`MultiNamedPlaceholder`] pattern backend.
6
7#[cfg(feature = "alloc")]
8use alloc::{borrow::Cow, boxed::Box, collections::BTreeMap, str::FromStr, string::String};
9use core::fmt;
10#[cfg(feature = "litemap")]
11use litemap::LiteMap;
12use writeable::Writeable;
13
14use crate::common::*;
15use crate::Error;
16
17/// A string wrapper for the [`MultiNamedPlaceholder`] pattern backend.
18///
19/// # Examples
20///
21/// ```
22/// use core::cmp::Ordering;
23/// use core::str::FromStr;
24/// use icu_pattern::MultiNamedPlaceholderKey;
25/// use icu_pattern::MultiNamedPlaceholderPattern;
26/// use icu_pattern::PatternItem;
27///
28/// // Parse the string syntax and check the resulting data store:
29/// let pattern = MultiNamedPlaceholderPattern::try_from_str(
30/// "Hello, {person0} and {person1}!",
31/// Default::default(),
32/// )
33/// .unwrap();
34///
35/// assert_eq!(
36/// pattern.iter().cmp(
37/// [
38/// PatternItem::Literal("Hello, "),
39/// PatternItem::Placeholder(MultiNamedPlaceholderKey("person0")),
40/// PatternItem::Literal(" and "),
41/// PatternItem::Placeholder(MultiNamedPlaceholderKey("person1")),
42/// PatternItem::Literal("!")
43/// ]
44/// .into_iter()
45/// ),
46/// Ordering::Equal
47/// );
48/// ```
49#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
50#[repr(transparent)]
51#[allow(clippy::exhaustive_structs)] // transparent newtype
52pub struct MultiNamedPlaceholderKey<'a>(pub &'a str);
53
54/// Cowable version of [`MultiNamedPlaceholderKey`], used during construction.
55#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
56#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
57#[repr(transparent)]
58#[allow(clippy::exhaustive_structs)] // transparent newtype
59#[cfg(feature = "alloc")]
60pub struct MultiNamedPlaceholderKeyCow<'a>(pub Cow<'a, str>);
61
62#[cfg(feature = "alloc")]
63impl FromStr for MultiNamedPlaceholderKeyCow<'_> {
64 type Err = Error;
65 fn from_str(s: &str) -> Result<Self, Self::Err> {
66 // Can't borrow the str here unfortunately
67 Ok(MultiNamedPlaceholderKeyCow(Cow::Owned(String::from(s))))
68 }
69}
70
71#[derive(Debug, Clone, PartialEq, Eq)]
72#[non_exhaustive]
73pub struct MissingNamedPlaceholderError<'a> {
74 pub name: &'a str,
75}
76
77impl Writeable for MissingNamedPlaceholderError<'_> {
78 fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result {
79 sink.write_char('{')?;
80 sink.write_str(self.name)?;
81 sink.write_char('}')?;
82 Ok(())
83 }
84}
85
86#[cfg(feature = "alloc")]
87impl<'k, K, W> PlaceholderValueProvider<MultiNamedPlaceholderKey<'k>> for BTreeMap<K, W>
88where
89 K: Ord + core::borrow::Borrow<str>,
90 W: Writeable,
91{
92 type Error = MissingNamedPlaceholderError<'k>;
93
94 type W<'a>
95 = Result<&'a W, Self::Error>
96 where
97 Self: 'a;
98
99 type L<'a, 'l>
100 = &'l str
101 where
102 Self: 'a;
103
104 #[inline]
105 fn value_for<'a>(&'a self, key: MultiNamedPlaceholderKey<'k>) -> Self::W<'a> {
106 match self.get(key.0) {
107 Some(value) => Ok(value),
108 None => Err(MissingNamedPlaceholderError { name: key.0 }),
109 }
110 }
111 #[inline]
112 fn map_literal<'a, 'l>(&'a self, literal: &'l str) -> Self::L<'a, 'l> {
113 literal
114 }
115}
116
117#[cfg(feature = "litemap")]
118impl<'k, K, W, S> PlaceholderValueProvider<MultiNamedPlaceholderKey<'k>> for LiteMap<K, W, S>
119where
120 K: Ord + core::borrow::Borrow<str>,
121 W: Writeable,
122 S: litemap::store::Store<K, W>,
123{
124 type Error = MissingNamedPlaceholderError<'k>;
125
126 type W<'a>
127 = Result<&'a W, Self::Error>
128 where
129 Self: 'a;
130
131 type L<'a, 'l>
132 = &'l str
133 where
134 Self: 'a;
135
136 #[inline]
137 fn value_for<'a>(&'a self, key: MultiNamedPlaceholderKey<'k>) -> Self::W<'a> {
138 match self.get(key.0) {
139 Some(value) => Ok(value),
140 None => Err(MissingNamedPlaceholderError { name: key.0 }),
141 }
142 }
143 #[inline]
144 fn map_literal<'a, 'l>(&'a self, literal: &'l str) -> Self::L<'a, 'l> {
145 literal
146 }
147}
148
149/// Backend for patterns containing zero or more named placeholders.
150///
151/// This empty type is not constructible.
152///
153/// # Placeholder Keys
154///
155/// The placeholder is [`MultiNamedPlaceholderKey`].
156///
157/// In [`Pattern::interpolate()`], pass a map-like structure. Missing keys will be replaced
158/// with the Unicode replacement character U+FFFD.
159///
160/// # Encoding Details
161///
162/// The literals and placeholders are stored in context. A placeholder is encoded as a name length
163/// in octal code points followed by the placeholder name.
164///
165/// For example, consider the pattern: "Hello, {user} and {someone_else}!"
166///
167/// The encoding for this would be:
168///
169/// ```txt
170/// Hello, \x00\x04user and \x01\x04someone_else!
171/// ```
172///
173/// where `\x00\x04` and `\x01\x04` are a big-endian octal number representing the lengths of
174/// their respective placeholder names.
175///
176/// Consequences of this encoding:
177///
178/// 1. The maximum placeholder name length is 64 bytes
179/// 2. Code points in the range `\x00` through `\x07` are reserved for the placeholder name
180///
181/// # Examples
182///
183/// Example patterns supported by this backend:
184///
185/// ```
186/// use core::str::FromStr;
187/// use icu_pattern::MultiNamedPlaceholder;
188/// use icu_pattern::Pattern;
189/// use std::collections::BTreeMap;
190///
191/// let placeholder_value_map: BTreeMap<&str, &str> = [
192/// ("num", "5"),
193/// ("letter", "X"),
194/// ("", "empty"),
195/// ("unused", "unused"),
196/// ]
197/// .into_iter()
198/// .collect();
199///
200/// // Single placeholder:
201/// assert_eq!(
202/// Pattern::<MultiNamedPlaceholder>::try_from_str(
203/// "{num} days ago",
204/// Default::default()
205/// )
206/// .unwrap()
207/// .try_interpolate_to_string(&placeholder_value_map)
208/// .unwrap(),
209/// "5 days ago",
210/// );
211///
212/// // No placeholder (note, the placeholder value is never accessed):
213/// assert_eq!(
214/// Pattern::<MultiNamedPlaceholder>::try_from_str(
215/// "yesterday",
216/// Default::default()
217/// )
218/// .unwrap()
219/// .try_interpolate_to_string(&placeholder_value_map)
220/// .unwrap(),
221/// "yesterday",
222/// );
223///
224/// // No literals, only placeholders:
225/// assert_eq!(
226/// Pattern::<MultiNamedPlaceholder>::try_from_str(
227/// "{letter}{num}{}",
228/// Default::default()
229/// )
230/// .unwrap()
231/// .try_interpolate_to_string(&placeholder_value_map)
232/// .unwrap(),
233/// "X5empty",
234/// );
235/// ```
236///
237/// Use [`LiteMap`] for alloc-free formatting:
238///
239/// ```
240/// use core::str::FromStr;
241/// use icu_pattern::MultiNamedPlaceholderPattern;
242/// use litemap::LiteMap;
243/// use writeable::TryWriteable;
244///
245/// static PLACEHOLDER_VALUE_MAP: LiteMap<&str, usize, &[(&str, usize)]> =
246/// LiteMap::from_sorted_store_unchecked(&[("seven", 11)]);
247///
248/// // Note: String allocates, but this could be a non-allocating sink
249/// let mut sink = String::new();
250///
251/// MultiNamedPlaceholderPattern::try_from_str("{seven}", Default::default())
252/// .unwrap()
253/// .try_interpolate(&PLACEHOLDER_VALUE_MAP)
254/// .try_write_to(&mut sink)
255/// .unwrap()
256/// .unwrap();
257///
258/// assert_eq!(sink, "11");
259/// ```
260///
261/// Missing placeholder values cause an error result to be returned. However,
262/// based on the design of [`TryWriteable`], the error can be discarded to get
263/// a best-effort interpolation with potential replacement characters.
264///
265/// ```should_panic
266/// use core::str::FromStr;
267/// use icu_pattern::MultiNamedPlaceholder;
268/// use icu_pattern::Pattern;
269/// use std::collections::BTreeMap;
270///
271/// let placeholder_value_map: BTreeMap<&str, &str> =
272/// [("num", "5"), ("letter", "X")].into_iter().collect();
273///
274/// Pattern::<MultiNamedPlaceholder>::try_from_str(
275/// "Your name is {your_name}",
276/// Default::default(),
277/// )
278/// .unwrap()
279/// .try_interpolate_to_string(&placeholder_value_map)
280/// .unwrap();
281/// ```
282///
283/// Recover the best-effort lossy string by directly using [`Pattern::try_interpolate()`]:
284///
285/// ```
286/// use core::str::FromStr;
287/// use icu_pattern::MissingNamedPlaceholderError;
288/// use icu_pattern::MultiNamedPlaceholder;
289/// use icu_pattern::Pattern;
290/// use std::borrow::Cow;
291/// use std::collections::BTreeMap;
292/// use writeable::TryWriteable;
293///
294/// let placeholder_value_map: BTreeMap<&str, &str> =
295/// [("num", "5"), ("letter", "X")].into_iter().collect();
296///
297/// let pattern = Pattern::<MultiNamedPlaceholder>::try_from_str(
298/// "Your name is {your_name}",
299/// Default::default(),
300/// )
301/// .unwrap();
302///
303/// let mut buffer = String::new();
304/// let result = pattern
305/// .try_interpolate(&placeholder_value_map)
306/// .try_write_to(&mut buffer)
307/// .expect("infallible write to String");
308///
309/// assert!(matches!(result, Err(MissingNamedPlaceholderError { .. })));
310/// assert_eq!(result.unwrap_err().name, "your_name");
311/// assert_eq!(buffer, "Your name is {your_name}");
312/// ```
313///
314/// [`Pattern::interpolate()`]: crate::Pattern::interpolate
315/// [`Pattern::try_interpolate()`]: crate::Pattern::try_interpolate
316/// [`TryWriteable`]: writeable::TryWriteable
317#[derive(Debug, Copy, Clone, PartialEq, Eq)]
318#[allow(clippy::exhaustive_enums)] // Empty Enum
319pub enum MultiNamedPlaceholder {}
320
321impl crate::private::Sealed for MultiNamedPlaceholder {}
322
323impl PatternBackend for MultiNamedPlaceholder {
324 type PlaceholderKey<'a> = MultiNamedPlaceholderKey<'a>;
325 #[cfg(feature = "alloc")]
326 type PlaceholderKeyCow<'a> = MultiNamedPlaceholderKeyCow<'a>;
327 type Error<'a> = MissingNamedPlaceholderError<'a>;
328 type Store = str;
329 type Iter<'a> = MultiNamedPlaceholderPatternIterator<'a>;
330
331 fn validate_store(store: &Self::Store) -> Result<(), Error> {
332 let mut iter = MultiNamedPlaceholderPatternIterator::new(store);
333 while iter
334 .try_next()
335 .map_err(|e| match e {
336 MultiNamedPlaceholderError::InvalidStore => Error::InvalidPattern,
337 MultiNamedPlaceholderError::Unreachable => {
338 debug_assert!(false, "unreachable");
339 Error::InvalidPattern
340 }
341 })?
342 .is_some()
343 {}
344 Ok(())
345 }
346
347 fn iter_items(store: &Self::Store) -> Self::Iter<'_> {
348 MultiNamedPlaceholderPatternIterator::new(store)
349 }
350
351 #[cfg(feature = "alloc")]
352 fn try_from_items<
353 'cow,
354 'ph,
355 I: Iterator<Item = Result<PatternItemCow<'cow, Self::PlaceholderKeyCow<'ph>>, Error>>,
356 >(
357 items: I,
358 ) -> Result<Box<str>, Error> {
359 let mut string = String::new();
360 for item in items {
361 match item? {
362 PatternItemCow::Literal(s) if s.contains(|x| (x as usize) <= 0x07) => {
363 // TODO: Should this be a different error type?
364 return Err(Error::InvalidPattern);
365 }
366 PatternItemCow::Literal(s) => string.push_str(&s),
367 PatternItemCow::Placeholder(ph_key) => {
368 let name_length = ph_key.0.len();
369 if name_length >= 64 {
370 return Err(Error::InvalidPlaceholder);
371 }
372 let lead = (name_length >> 3) as u8;
373 let trail = (name_length & 0x7) as u8;
374 string.push(char::from(lead));
375 string.push(char::from(trail));
376 string.push_str(&ph_key.0);
377 }
378 }
379 }
380 Ok(string.into_boxed_str())
381 }
382
383 fn empty() -> &'static Self::Store {
384 ""
385 }
386}
387
388#[derive(Debug)]
389pub struct MultiNamedPlaceholderPatternIterator<'a> {
390 store: &'a str,
391}
392
393// Note: we don't implement ExactSizeIterator since we don't store that metadata in MultiNamed.
394
395impl<'a> Iterator for MultiNamedPlaceholderPatternIterator<'a> {
396 type Item = PatternItem<'a, MultiNamedPlaceholderKey<'a>>;
397 fn next(&mut self) -> Option<Self::Item> {
398 match self.try_next() {
399 Ok(next) => next,
400 Err(MultiNamedPlaceholderError::InvalidStore) => {
401 debug_assert!(
402 false,
403 "invalid store with {} bytes remaining",
404 self.store.len()
405 );
406 None
407 }
408 Err(MultiNamedPlaceholderError::Unreachable) => {
409 debug_assert!(false, "unreachable");
410 None
411 }
412 }
413 }
414}
415
416enum MultiNamedPlaceholderError {
417 InvalidStore,
418 Unreachable,
419}
420
421impl<'a> MultiNamedPlaceholderPatternIterator<'a> {
422 fn new(store: &'a str) -> Self {
423 Self { store }
424 }
425
426 fn try_next(
427 &mut self,
428 ) -> Result<Option<PatternItem<'a, MultiNamedPlaceholderKey<'a>>>, MultiNamedPlaceholderError>
429 {
430 match self.store.find(|x| (x as usize) <= 0x07) {
431 Some(0) => {
432 // Placeholder
433 let Some((&[lead, trail], remainder)) = self
434 .store
435 .split_at_checked(2)
436 .map(|(a, b)| (a.as_bytes(), b))
437 else {
438 return Err(MultiNamedPlaceholderError::InvalidStore);
439 };
440 debug_assert!(lead <= 7);
441 if trail > 7 {
442 return Err(MultiNamedPlaceholderError::InvalidStore);
443 }
444 let placeholder_len = (lead << 3) + trail;
445 let Some((placeholder_name, remainder)) =
446 remainder.split_at_checked(placeholder_len as usize)
447 else {
448 return Err(MultiNamedPlaceholderError::InvalidStore);
449 };
450 self.store = remainder;
451 Ok(Some(PatternItem::Placeholder(MultiNamedPlaceholderKey(
452 placeholder_name,
453 ))))
454 }
455 Some(i) => {
456 // Literal
457 let Some((literal, remainder)) = self.store.split_at_checked(i) else {
458 debug_assert!(false, "should be a perfect slice");
459 return Err(MultiNamedPlaceholderError::Unreachable);
460 };
461 self.store = remainder;
462 Ok(Some(PatternItem::Literal(literal)))
463 }
464 None if self.store.is_empty() => {
465 // End of string
466 Ok(None)
467 }
468 None => {
469 // Closing literal
470 let literal = self.store;
471 self.store = "";
472 Ok(Some(PatternItem::Literal(literal)))
473 }
474 }
475 }
476}
477
478#[cfg(test)]
479mod tests {
480 use super::*;
481 use crate::{MultiNamedPlaceholder, MultiNamedPlaceholderPattern};
482
483 #[test]
484 fn test_invalid() {
485 let long_str = "0123456789".repeat(1000000);
486 let strings = [
487 "{", // invalid syntax
488 "{@}", // placeholder name too long
489 "\x00", // invalid character
490 "\x07", // invalid character
491 ];
492 for string in strings {
493 let string = string.replace('@', &long_str);
494 assert!(
495 MultiNamedPlaceholderPattern::try_from_str(&string, Default::default()).is_err(),
496 "{string:?}"
497 );
498 }
499 let stores = [
500 "\x00", // too short
501 "\x02", // too short
502 "\x00\x02", // no placeholder name
503 "\x00\x02a", // placeholder name too short
504 ];
505 for store in stores {
506 assert!(
507 MultiNamedPlaceholder::validate_store(store).is_err(),
508 "{store:?}"
509 );
510 }
511 }
512}