icu_list/
list_formatter.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::options::{ListFormatterOptions, ListLength};
6use crate::provider::*;
7use core::fmt::{self, Write};
8use icu_locale_core::preferences::define_preferences;
9use icu_provider::marker::ErasedMarker;
10use icu_provider::prelude::*;
11use writeable::*;
12
13#[cfg(doc)]
14extern crate writeable;
15
16define_preferences!(
17    /// The preferences for list formatting.
18    [Copy]
19    ListFormatterPreferences,
20    {}
21);
22
23/// A formatter that renders sequences of items in an i18n-friendly way. See the
24/// [crate-level documentation](crate) for more details.
25#[derive(Debug)]
26pub struct ListFormatter {
27    data: DataPayload<ErasedMarker<ListFormatterPatterns<'static>>>,
28}
29
30macro_rules! constructor {
31    ($name: ident, $name_buffer: ident, $name_unstable: ident, $marker: ty, $doc: literal) => {
32        icu_provider::gen_buffer_data_constructors!(
33            (prefs: ListFormatterPreferences, options: ListFormatterOptions) ->  error: DataError,
34            #[doc = concat!("Creates a new [`ListFormatter`] that produces a ", $doc, "-type list using compiled data.")]
35            ///
36            /// See the [CLDR spec](https://unicode.org/reports/tr35/tr35-general.html#ListPatterns) for
37            /// an explanation of the different types.
38            functions: [
39                $name,
40                $name_buffer,
41                $name_unstable,
42                Self
43            ]
44        );
45
46        #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::$name)]
47        pub fn $name_unstable(
48            provider: &(impl DataProvider<$marker> + ?Sized),
49            prefs: ListFormatterPreferences,
50            options: ListFormatterOptions,
51        ) -> Result<Self, DataError> {
52            let length = match options.length.unwrap_or_default() {
53                ListLength::Narrow => ListFormatterPatterns::NARROW,
54                ListLength::Short => ListFormatterPatterns::SHORT,
55                ListLength::Wide => ListFormatterPatterns::WIDE,
56            };
57            let locale = <$marker>::make_locale(prefs.locale_preferences);
58            let data = provider
59                .load(DataRequest {
60                    id: DataIdentifierBorrowed::for_marker_attributes_and_locale(
61                        length,
62                        &locale
63                    ),
64                    ..Default::default()
65                })?
66                .payload
67                .cast();
68            Ok(Self { data })
69        }
70    };
71}
72
73impl ListFormatter {
74    constructor!(
75        try_new_and,
76        try_new_and_with_buffer_provider,
77        try_new_and_unstable,
78        ListAndV1,
79        "and"
80    );
81    constructor!(
82        try_new_or,
83        try_new_or_with_buffer_provider,
84        try_new_or_unstable,
85        ListOrV1,
86        "or"
87    );
88    constructor!(
89        try_new_unit,
90        try_new_unit_with_buffer_provider,
91        try_new_unit_unstable,
92        ListUnitV1,
93        "unit"
94    );
95
96    /// Returns a [`Writeable`] composed of the input [`Writeable`]s and the language-dependent
97    /// formatting.
98    ///
99    /// The [`Writeable`] is annotated with [`parts::ELEMENT`] for input elements,
100    /// and [`parts::LITERAL`] for list literals.
101    ///
102    /// # Example
103    ///
104    /// ```
105    /// use icu::list::options::*;
106    /// use icu::list::{parts, ListFormatter};
107    /// # use icu::locale::locale;
108    /// # use writeable::*;
109    /// let formatteur = ListFormatter::try_new_and(
110    ///     locale!("fr").into(),
111    ///     ListFormatterOptions::default().with_length(ListLength::Wide),
112    /// )
113    /// .unwrap();
114    /// let pays = ["Italie", "France", "Espagne", "Allemagne"];
115    ///
116    /// assert_writeable_parts_eq!(
117    ///     formatteur.format(pays.iter()),
118    ///     "Italie, France, Espagne et Allemagne",
119    ///     [
120    ///         (0, 6, parts::ELEMENT),
121    ///         (6, 8, parts::LITERAL),
122    ///         (8, 14, parts::ELEMENT),
123    ///         (14, 16, parts::LITERAL),
124    ///         (16, 23, parts::ELEMENT),
125    ///         (23, 27, parts::LITERAL),
126    ///         (27, 36, parts::ELEMENT),
127    ///     ]
128    /// );
129    /// ```
130    pub fn format<'a, W: Writeable + 'a, I: Iterator<Item = W> + Clone + 'a>(
131        &'a self,
132        values: I,
133    ) -> FormattedList<'a, W, I> {
134        FormattedList {
135            formatter: self,
136            values,
137        }
138    }
139
140    /// Returns a [`String`] composed of the input [`Writeable`]s and the language-dependent
141    /// formatting.
142    ///
143    /// ✨ *Enabled with the `alloc` Cargo feature.*
144    #[cfg(feature = "alloc")]
145    pub fn format_to_string<W: Writeable, I: Iterator<Item = W> + Clone>(
146        &self,
147        values: I,
148    ) -> alloc::string::String {
149        self.format(values).write_to_string().into_owned()
150    }
151}
152
153/// The [`Part`]s used by [`ListFormatter`].
154pub mod parts {
155    use writeable::Part;
156
157    /// The [`Part`] used by [`FormattedList`](super::FormattedList) to mark the part of the string that is an element.
158    ///
159    /// * `category`: `"list"`
160    /// * `value`: `"element"`
161    pub const ELEMENT: Part = Part {
162        category: "list",
163        value: "element",
164    };
165
166    /// The [`Part`] used by [`FormattedList`](super::FormattedList) to mark the part of the string that is a list literal,
167    /// such as ", " or " and ".
168    ///
169    /// * `category`: `"list"`
170    /// * `value`: `"literal"`
171    pub const LITERAL: Part = Part {
172        category: "list",
173        value: "literal",
174    };
175}
176
177/// The [`Writeable`] implementation that is returned by [`ListFormatter::format`]. See
178/// the [`writeable`] crate for how to consume this.
179#[derive(Debug)]
180pub struct FormattedList<'a, W: Writeable + 'a, I: Iterator<Item = W> + Clone + 'a> {
181    formatter: &'a ListFormatter,
182    values: I,
183}
184
185impl<'a, W: Writeable + 'a, I: Iterator<Item = W> + Clone + 'a> Writeable
186    for FormattedList<'a, W, I>
187{
188    fn write_to_parts<V: PartsWrite + ?Sized>(&self, sink: &mut V) -> fmt::Result {
189        macro_rules! literal {
190            ($lit:ident) => {
191                sink.with_part(parts::LITERAL, |l| l.write_str($lit))
192            };
193        }
194        macro_rules! value {
195            ($val:expr) => {
196                sink.with_part(parts::ELEMENT, |e| $val.write_to_parts(e))
197            };
198        }
199
200        let patterns = self.formatter.data.get();
201
202        let mut values = self.values.clone();
203
204        if let Some(first) = values.next() {
205            if let Some(second) = values.next() {
206                if let Some(third) = values.next() {
207                    // Start(values[0], middle(..., middle(values[n-3], End(values[n-2], values[n-1]))...)) =
208                    // start_before + values[0] + start_between + (values[1..n-3] + middle_between)* +
209                    // values[n-2] + end_between + values[n-1] + end_after
210
211                    let (start_before, start_between, _) = patterns.start.parts();
212
213                    literal!(start_before)?;
214                    value!(first)?;
215                    literal!(start_between)?;
216                    value!(second)?;
217
218                    let mut next = third;
219
220                    for next_next in values {
221                        let between = &*patterns.middle;
222                        literal!(between)?;
223                        value!(next)?;
224                        next = next_next;
225                    }
226
227                    let (_, end_between, end_after) = patterns.end.parts(&next);
228                    literal!(end_between)?;
229                    value!(next)?;
230                    literal!(end_after)
231                } else {
232                    // Pair(values[0], values[1]) = pair_before + values[0] + pair_between + values[1] + pair_after
233                    let (before, between, after) = patterns
234                        .pair
235                        .as_ref()
236                        .unwrap_or(&patterns.end)
237                        .parts(&second);
238                    literal!(before)?;
239                    value!(first)?;
240                    literal!(between)?;
241                    value!(second)?;
242                    literal!(after)
243                }
244            } else {
245                value!(first)
246            }
247        } else {
248            Ok(())
249        }
250    }
251
252    fn writeable_length_hint(&self) -> LengthHint {
253        let mut count = 0;
254        let item_length = self
255            .values
256            .clone()
257            .map(|w| {
258                count += 1;
259                w.writeable_length_hint()
260            })
261            .sum::<LengthHint>();
262        item_length + self.formatter.data.get().length_hint(count)
263    }
264}
265
266impl<'a, W: Writeable + 'a, I: Iterator<Item = W> + Clone + 'a> core::fmt::Display
267    for FormattedList<'a, W, I>
268{
269    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
270        self.write_to(f)
271    }
272}
273
274#[cfg(all(test, feature = "datagen"))]
275mod tests {
276    use super::*;
277    use writeable::{assert_writeable_eq, assert_writeable_parts_eq};
278
279    fn formatter(patterns: ListFormatterPatterns<'static>) -> ListFormatter {
280        ListFormatter {
281            data: DataPayload::from_owned(patterns),
282        }
283    }
284
285    #[test]
286    fn test_slices() {
287        let formatter = formatter(crate::patterns::test::test_patterns_general());
288        let values = ["one", "two", "three", "four", "five"];
289
290        assert_writeable_eq!(formatter.format(values[0..0].iter()), "");
291        assert_writeable_eq!(formatter.format(values[0..1].iter()), "one");
292        assert_writeable_eq!(formatter.format(values[0..2].iter()), "$one;two+");
293        assert_writeable_eq!(formatter.format(values[0..3].iter()), "@one:two.three!");
294        assert_writeable_eq!(
295            formatter.format(values[0..4].iter()),
296            "@one:two,three.four!"
297        );
298
299        assert_writeable_parts_eq!(
300            formatter.format(values.iter()),
301            "@one:two,three,four.five!",
302            [
303                (0, 1, parts::LITERAL),
304                (1, 4, parts::ELEMENT),
305                (4, 5, parts::LITERAL),
306                (5, 8, parts::ELEMENT),
307                (8, 9, parts::LITERAL),
308                (9, 14, parts::ELEMENT),
309                (14, 15, parts::LITERAL),
310                (15, 19, parts::ELEMENT),
311                (19, 20, parts::LITERAL),
312                (20, 24, parts::ELEMENT),
313                (24, 25, parts::LITERAL)
314            ]
315        );
316    }
317
318    #[test]
319    fn test_into_iterator() {
320        let formatter = formatter(crate::patterns::test::test_patterns_general());
321
322        let mut vecdeque = std::collections::vec_deque::VecDeque::<u8>::new();
323        vecdeque.push_back(10);
324        vecdeque.push_front(48);
325
326        assert_writeable_parts_eq!(
327            formatter.format(vecdeque.iter()),
328            "$48;10+",
329            [
330                (0, 1, parts::LITERAL),
331                (1, 3, parts::ELEMENT),
332                (3, 4, parts::LITERAL),
333                (4, 6, parts::ELEMENT),
334                (6, 7, parts::LITERAL),
335            ]
336        );
337    }
338
339    #[test]
340    fn test_iterator() {
341        let formatter = formatter(crate::patterns::test::test_patterns_general());
342
343        assert_writeable_parts_eq!(
344            formatter.format(core::iter::repeat_n(5, 2)),
345            "$5;5+",
346            [
347                (0, 1, parts::LITERAL),
348                (1, 2, parts::ELEMENT),
349                (2, 3, parts::LITERAL),
350                (3, 4, parts::ELEMENT),
351                (4, 5, parts::LITERAL),
352            ]
353        );
354    }
355
356    #[test]
357    fn test_conditional() {
358        let formatter = formatter(crate::patterns::test::test_patterns_conditional());
359
360        assert_writeable_eq!(formatter.format(["beta", "alpha"].iter()), "beta :o alpha");
361    }
362
363    macro_rules! test {
364        ($locale:literal, $type:ident, $(($input:expr, $output:literal),)+) => {
365            let f = ListFormatter::$type(
366                icu::locale::locale!($locale).into(),
367                Default::default(),
368            ).unwrap();
369            $(
370                assert_writeable_eq!(f.format($input.iter()), $output);
371            )+
372        };
373    }
374
375    #[test]
376    fn test_basic() {
377        test!("fr", try_new_or, (["A", "B"], "A ou B"),);
378    }
379
380    #[test]
381    fn test_spanish() {
382        test!(
383            "es",
384            try_new_and,
385            (["x", "Mallorca"], "x y Mallorca"),
386            (["x", "Ibiza"], "x e Ibiza"),
387            (["x", "Hidalgo"], "x e Hidalgo"),
388            (["x", "Hierva"], "x y Hierva"),
389        );
390
391        test!(
392            "es",
393            try_new_or,
394            (["x", "Ibiza"], "x o Ibiza"),
395            (["x", "Okinawa"], "x u Okinawa"),
396            (["x", "8 más"], "x u 8 más"),
397            (["x", "8"], "x u 8"),
398            (["x", "87 más"], "x u 87 más"),
399            (["x", "87"], "x u 87"),
400            (["x", "11 más"], "x u 11 más"),
401            (["x", "11"], "x u 11"),
402            (["x", "110 más"], "x o 110 más"),
403            (["x", "110"], "x o 110"),
404            (["x", "11.000 más"], "x u 11.000 más"),
405            (["x", "11.000"], "x u 11.000"),
406            (["x", "11.000,92 más"], "x u 11.000,92 más"),
407            (["x", "11.000,92"], "x u 11.000,92"),
408        );
409
410        test!("es-AR", try_new_and, (["x", "Ibiza"], "x e Ibiza"),);
411    }
412
413    #[test]
414    fn test_hebrew() {
415        test!(
416            "he",
417            try_new_and,
418            (["x", "יפו"], "x ויפו"),
419            (["x", "Ibiza"], "x ו‑Ibiza"),
420        );
421    }
422}