icu_pattern/
multi_named.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Code for the [`MultiNamedPlaceholder`] pattern backend.
6
7#[cfg(feature = "alloc")]
8use alloc::{borrow::Cow, boxed::Box, collections::BTreeMap, str::FromStr, string::String};
9use core::fmt;
10#[cfg(feature = "litemap")]
11use litemap::LiteMap;
12use writeable::Writeable;
13
14use crate::common::*;
15use crate::Error;
16
17/// A string wrapper for the [`MultiNamedPlaceholder`] pattern backend.
18///
19/// # Examples
20///
21/// ```
22/// use core::cmp::Ordering;
23/// use core::str::FromStr;
24/// use icu_pattern::MultiNamedPlaceholderKey;
25/// use icu_pattern::MultiNamedPlaceholderPattern;
26/// use icu_pattern::PatternItem;
27///
28/// // Parse the string syntax and check the resulting data store:
29/// let pattern = MultiNamedPlaceholderPattern::try_from_str(
30///     "Hello, {person0} and {person1}!",
31///     Default::default(),
32/// )
33/// .unwrap();
34///
35/// assert_eq!(
36///     pattern.iter().cmp(
37///         [
38///             PatternItem::Literal("Hello, "),
39///             PatternItem::Placeholder(MultiNamedPlaceholderKey("person0")),
40///             PatternItem::Literal(" and "),
41///             PatternItem::Placeholder(MultiNamedPlaceholderKey("person1")),
42///             PatternItem::Literal("!")
43///         ]
44///         .into_iter()
45///     ),
46///     Ordering::Equal
47/// );
48/// ```
49#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
50#[repr(transparent)]
51#[allow(clippy::exhaustive_structs)] // transparent newtype
52pub struct MultiNamedPlaceholderKey<'a>(pub &'a str);
53
54/// Cowable version of [`MultiNamedPlaceholderKey`], used during construction.
55#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
56#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
57#[repr(transparent)]
58#[allow(clippy::exhaustive_structs)] // transparent newtype
59#[cfg(feature = "alloc")]
60pub struct MultiNamedPlaceholderKeyCow<'a>(pub Cow<'a, str>);
61
62#[cfg(feature = "alloc")]
63impl FromStr for MultiNamedPlaceholderKeyCow<'_> {
64    type Err = Error;
65    fn from_str(s: &str) -> Result<Self, Self::Err> {
66        // Can't borrow the str here unfortunately
67        Ok(MultiNamedPlaceholderKeyCow(Cow::Owned(String::from(s))))
68    }
69}
70
71#[derive(Debug, Clone, PartialEq, Eq)]
72#[non_exhaustive]
73pub struct MissingNamedPlaceholderError<'a> {
74    pub name: &'a str,
75}
76
77impl Writeable for MissingNamedPlaceholderError<'_> {
78    fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result {
79        sink.write_char('{')?;
80        sink.write_str(self.name)?;
81        sink.write_char('}')?;
82        Ok(())
83    }
84}
85
86#[cfg(feature = "alloc")]
87impl<'k, K, W> PlaceholderValueProvider<MultiNamedPlaceholderKey<'k>> for BTreeMap<K, W>
88where
89    K: Ord + core::borrow::Borrow<str>,
90    W: Writeable,
91{
92    type Error = MissingNamedPlaceholderError<'k>;
93
94    type W<'a>
95        = Result<&'a W, Self::Error>
96    where
97        Self: 'a;
98
99    type L<'a, 'l>
100        = &'l str
101    where
102        Self: 'a;
103
104    #[inline]
105    fn value_for<'a>(&'a self, key: MultiNamedPlaceholderKey<'k>) -> Self::W<'a> {
106        match self.get(key.0) {
107            Some(value) => Ok(value),
108            None => Err(MissingNamedPlaceholderError { name: key.0 }),
109        }
110    }
111    #[inline]
112    fn map_literal<'a, 'l>(&'a self, literal: &'l str) -> Self::L<'a, 'l> {
113        literal
114    }
115}
116
117#[cfg(feature = "litemap")]
118impl<'k, K, W, S> PlaceholderValueProvider<MultiNamedPlaceholderKey<'k>> for LiteMap<K, W, S>
119where
120    K: Ord + core::borrow::Borrow<str>,
121    W: Writeable,
122    S: litemap::store::Store<K, W>,
123{
124    type Error = MissingNamedPlaceholderError<'k>;
125
126    type W<'a>
127        = Result<&'a W, Self::Error>
128    where
129        Self: 'a;
130
131    type L<'a, 'l>
132        = &'l str
133    where
134        Self: 'a;
135
136    #[inline]
137    fn value_for<'a>(&'a self, key: MultiNamedPlaceholderKey<'k>) -> Self::W<'a> {
138        match self.get(key.0) {
139            Some(value) => Ok(value),
140            None => Err(MissingNamedPlaceholderError { name: key.0 }),
141        }
142    }
143    #[inline]
144    fn map_literal<'a, 'l>(&'a self, literal: &'l str) -> Self::L<'a, 'l> {
145        literal
146    }
147}
148
149/// Backend for patterns containing zero or more named placeholders.
150///
151/// This empty type is not constructible.
152///
153/// # Placeholder Keys
154///
155/// The placeholder is [`MultiNamedPlaceholderKey`].
156///
157/// In [`Pattern::interpolate()`], pass a map-like structure. Missing keys will be replaced
158/// with the Unicode replacement character U+FFFD.
159///
160/// # Encoding Details
161///
162/// The literals and placeholders are stored in context. A placeholder is encoded as a name length
163/// in octal code points followed by the placeholder name.
164///
165/// For example, consider the pattern: "Hello, {user} and {someone_else}!"
166///
167/// The encoding for this would be:
168///
169/// ```txt
170/// Hello, \x00\x04user and \x01\x04someone_else!
171/// ```
172///
173/// where `\x00\x04` and `\x01\x04` are a big-endian octal number representing the lengths of
174/// their respective placeholder names.
175///
176/// Consequences of this encoding:
177///
178/// 1. The maximum placeholder name length is 64 bytes
179/// 2. Code points in the range `\x00` through `\x07` are reserved for the placeholder name
180///
181/// # Examples
182///
183/// Example patterns supported by this backend:
184///
185/// ```
186/// use core::str::FromStr;
187/// use icu_pattern::MultiNamedPlaceholder;
188/// use icu_pattern::Pattern;
189/// use std::collections::BTreeMap;
190///
191/// let placeholder_value_map: BTreeMap<&str, &str> = [
192///     ("num", "5"),
193///     ("letter", "X"),
194///     ("", "empty"),
195///     ("unused", "unused"),
196/// ]
197/// .into_iter()
198/// .collect();
199///
200/// // Single placeholder:
201/// assert_eq!(
202///     Pattern::<MultiNamedPlaceholder>::try_from_str(
203///         "{num} days ago",
204///         Default::default()
205///     )
206///     .unwrap()
207///     .try_interpolate_to_string(&placeholder_value_map)
208///     .unwrap(),
209///     "5 days ago",
210/// );
211///
212/// // No placeholder (note, the placeholder value is never accessed):
213/// assert_eq!(
214///     Pattern::<MultiNamedPlaceholder>::try_from_str(
215///         "yesterday",
216///         Default::default()
217///     )
218///     .unwrap()
219///     .try_interpolate_to_string(&placeholder_value_map)
220///     .unwrap(),
221///     "yesterday",
222/// );
223///
224/// // No literals, only placeholders:
225/// assert_eq!(
226///     Pattern::<MultiNamedPlaceholder>::try_from_str(
227///         "{letter}{num}{}",
228///         Default::default()
229///     )
230///     .unwrap()
231///     .try_interpolate_to_string(&placeholder_value_map)
232///     .unwrap(),
233///     "X5empty",
234/// );
235/// ```
236///
237/// Use [`LiteMap`] for alloc-free formatting:
238///
239/// ```
240/// use core::str::FromStr;
241/// use icu_pattern::MultiNamedPlaceholderPattern;
242/// use litemap::LiteMap;
243/// use writeable::TryWriteable;
244///
245/// static PLACEHOLDER_VALUE_MAP: LiteMap<&str, usize, &[(&str, usize)]> =
246///     LiteMap::from_sorted_store_unchecked(&[("seven", 11)]);
247///
248/// // Note: String allocates, but this could be a non-allocating sink
249/// let mut sink = String::new();
250///
251/// MultiNamedPlaceholderPattern::try_from_str("{seven}", Default::default())
252///     .unwrap()
253///     .try_interpolate(&PLACEHOLDER_VALUE_MAP)
254///     .try_write_to(&mut sink)
255///     .unwrap()
256///     .unwrap();
257///
258/// assert_eq!(sink, "11");
259/// ```
260///
261/// Missing placeholder values cause an error result to be returned. However,
262/// based on the design of [`TryWriteable`], the error can be discarded to get
263/// a best-effort interpolation with potential replacement characters.
264///
265/// ```should_panic
266/// use core::str::FromStr;
267/// use icu_pattern::MultiNamedPlaceholder;
268/// use icu_pattern::Pattern;
269/// use std::collections::BTreeMap;
270///
271/// let placeholder_value_map: BTreeMap<&str, &str> =
272///     [("num", "5"), ("letter", "X")].into_iter().collect();
273///
274/// Pattern::<MultiNamedPlaceholder>::try_from_str(
275///     "Your name is {your_name}",
276///     Default::default(),
277/// )
278/// .unwrap()
279/// .try_interpolate_to_string(&placeholder_value_map)
280/// .unwrap();
281/// ```
282///
283/// Recover the best-effort lossy string by directly using [`Pattern::try_interpolate()`]:
284///
285/// ```
286/// use core::str::FromStr;
287/// use icu_pattern::MissingNamedPlaceholderError;
288/// use icu_pattern::MultiNamedPlaceholder;
289/// use icu_pattern::Pattern;
290/// use std::borrow::Cow;
291/// use std::collections::BTreeMap;
292/// use writeable::TryWriteable;
293///
294/// let placeholder_value_map: BTreeMap<&str, &str> =
295///     [("num", "5"), ("letter", "X")].into_iter().collect();
296///
297/// let pattern = Pattern::<MultiNamedPlaceholder>::try_from_str(
298///     "Your name is {your_name}",
299///     Default::default(),
300/// )
301/// .unwrap();
302///
303/// let mut buffer = String::new();
304/// let result = pattern
305///     .try_interpolate(&placeholder_value_map)
306///     .try_write_to(&mut buffer)
307///     .expect("infallible write to String");
308///
309/// assert!(matches!(result, Err(MissingNamedPlaceholderError { .. })));
310/// assert_eq!(result.unwrap_err().name, "your_name");
311/// assert_eq!(buffer, "Your name is {your_name}");
312/// ```
313///
314/// [`Pattern::interpolate()`]: crate::Pattern::interpolate
315/// [`Pattern::try_interpolate()`]: crate::Pattern::try_interpolate
316/// [`TryWriteable`]: writeable::TryWriteable
317#[derive(Debug, Copy, Clone, PartialEq, Eq)]
318#[allow(clippy::exhaustive_enums)] // Empty Enum
319pub enum MultiNamedPlaceholder {}
320
321impl crate::private::Sealed for MultiNamedPlaceholder {}
322
323impl PatternBackend for MultiNamedPlaceholder {
324    type PlaceholderKey<'a> = MultiNamedPlaceholderKey<'a>;
325    #[cfg(feature = "alloc")]
326    type PlaceholderKeyCow<'a> = MultiNamedPlaceholderKeyCow<'a>;
327    type Error<'a> = MissingNamedPlaceholderError<'a>;
328    type Store = str;
329    type Iter<'a> = MultiNamedPlaceholderPatternIterator<'a>;
330
331    fn validate_store(store: &Self::Store) -> Result<(), Error> {
332        let mut iter = MultiNamedPlaceholderPatternIterator::new(store);
333        while iter
334            .try_next()
335            .map_err(|e| match e {
336                MultiNamedPlaceholderError::InvalidStore => Error::InvalidPattern,
337                MultiNamedPlaceholderError::Unreachable => {
338                    debug_assert!(false, "unreachable");
339                    Error::InvalidPattern
340                }
341            })?
342            .is_some()
343        {}
344        Ok(())
345    }
346
347    fn iter_items(store: &Self::Store) -> Self::Iter<'_> {
348        MultiNamedPlaceholderPatternIterator::new(store)
349    }
350
351    #[cfg(feature = "alloc")]
352    fn try_from_items<
353        'cow,
354        'ph,
355        I: Iterator<Item = Result<PatternItemCow<'cow, Self::PlaceholderKeyCow<'ph>>, Error>>,
356    >(
357        items: I,
358    ) -> Result<Box<str>, Error> {
359        let mut string = String::new();
360        for item in items {
361            match item? {
362                PatternItemCow::Literal(s) if s.contains(|x| (x as usize) <= 0x07) => {
363                    // TODO: Should this be a different error type?
364                    return Err(Error::InvalidPattern);
365                }
366                PatternItemCow::Literal(s) => string.push_str(&s),
367                PatternItemCow::Placeholder(ph_key) => {
368                    let name_length = ph_key.0.len();
369                    if name_length >= 64 {
370                        return Err(Error::InvalidPlaceholder);
371                    }
372                    let lead = (name_length >> 3) as u8;
373                    let trail = (name_length & 0x7) as u8;
374                    string.push(char::from(lead));
375                    string.push(char::from(trail));
376                    string.push_str(&ph_key.0);
377                }
378            }
379        }
380        Ok(string.into_boxed_str())
381    }
382
383    fn empty() -> &'static Self::Store {
384        ""
385    }
386}
387
388#[derive(Debug)]
389pub struct MultiNamedPlaceholderPatternIterator<'a> {
390    store: &'a str,
391}
392
393// Note: we don't implement ExactSizeIterator since we don't store that metadata in MultiNamed.
394
395impl<'a> Iterator for MultiNamedPlaceholderPatternIterator<'a> {
396    type Item = PatternItem<'a, MultiNamedPlaceholderKey<'a>>;
397    fn next(&mut self) -> Option<Self::Item> {
398        match self.try_next() {
399            Ok(next) => next,
400            Err(MultiNamedPlaceholderError::InvalidStore) => {
401                debug_assert!(
402                    false,
403                    "invalid store with {} bytes remaining",
404                    self.store.len()
405                );
406                None
407            }
408            Err(MultiNamedPlaceholderError::Unreachable) => {
409                debug_assert!(false, "unreachable");
410                None
411            }
412        }
413    }
414}
415
416enum MultiNamedPlaceholderError {
417    InvalidStore,
418    Unreachable,
419}
420
421impl<'a> MultiNamedPlaceholderPatternIterator<'a> {
422    fn new(store: &'a str) -> Self {
423        Self { store }
424    }
425
426    fn try_next(
427        &mut self,
428    ) -> Result<Option<PatternItem<'a, MultiNamedPlaceholderKey<'a>>>, MultiNamedPlaceholderError>
429    {
430        match self.store.find(|x| (x as usize) <= 0x07) {
431            Some(0) => {
432                // Placeholder
433                let Some((&[lead, trail], remainder)) = self
434                    .store
435                    .split_at_checked(2)
436                    .map(|(a, b)| (a.as_bytes(), b))
437                else {
438                    return Err(MultiNamedPlaceholderError::InvalidStore);
439                };
440                debug_assert!(lead <= 7);
441                if trail > 7 {
442                    return Err(MultiNamedPlaceholderError::InvalidStore);
443                }
444                let placeholder_len = (lead << 3) + trail;
445                let Some((placeholder_name, remainder)) =
446                    remainder.split_at_checked(placeholder_len as usize)
447                else {
448                    return Err(MultiNamedPlaceholderError::InvalidStore);
449                };
450                self.store = remainder;
451                Ok(Some(PatternItem::Placeholder(MultiNamedPlaceholderKey(
452                    placeholder_name,
453                ))))
454            }
455            Some(i) => {
456                // Literal
457                let Some((literal, remainder)) = self.store.split_at_checked(i) else {
458                    debug_assert!(false, "should be a perfect slice");
459                    return Err(MultiNamedPlaceholderError::Unreachable);
460                };
461                self.store = remainder;
462                Ok(Some(PatternItem::Literal(literal)))
463            }
464            None if self.store.is_empty() => {
465                // End of string
466                Ok(None)
467            }
468            None => {
469                // Closing literal
470                let literal = self.store;
471                self.store = "";
472                Ok(Some(PatternItem::Literal(literal)))
473            }
474        }
475    }
476}
477
478#[cfg(test)]
479mod tests {
480    use super::*;
481    use crate::{MultiNamedPlaceholder, MultiNamedPlaceholderPattern};
482
483    #[test]
484    fn test_invalid() {
485        let long_str = "0123456789".repeat(1000000);
486        let strings = [
487            "{",    // invalid syntax
488            "{@}",  // placeholder name too long
489            "\x00", // invalid character
490            "\x07", // invalid character
491        ];
492        for string in strings {
493            let string = string.replace('@', &long_str);
494            assert!(
495                MultiNamedPlaceholderPattern::try_from_str(&string, Default::default()).is_err(),
496                "{string:?}"
497            );
498        }
499        let stores = [
500            "\x00",      // too short
501            "\x02",      // too short
502            "\x00\x02",  // no placeholder name
503            "\x00\x02a", // placeholder name too short
504        ];
505        for store in stores {
506            assert!(
507                MultiNamedPlaceholder::validate_store(store).is_err(),
508                "{store:?}"
509            );
510        }
511    }
512}