icu_list/
list_formatter.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::options::{ListFormatterOptions, ListLength};
6use crate::provider::*;
7use core::fmt::{self, Write};
8use icu_locale_core::preferences::define_preferences;
9use icu_provider::marker::ErasedMarker;
10use icu_provider::prelude::*;
11use writeable::*;
12
13#[cfg(doc)]
14extern crate writeable;
15
16define_preferences!(
17    /// The preferences for list formatting.
18    [Copy]
19    ListFormatterPreferences,
20    {}
21);
22
23/// A formatter that renders sequences of items in an i18n-friendly way. See the
24/// [crate-level documentation](crate) for more details.
25#[derive(Debug)]
26pub struct ListFormatter {
27    data: DataPayload<ErasedMarker<ListFormatterPatterns<'static>>>,
28}
29
30macro_rules! constructor {
31    ($name: ident, $name_buffer: ident, $name_unstable: ident, $marker: ty, $doc: literal) => {
32        icu_provider::gen_buffer_data_constructors!(
33            (prefs: ListFormatterPreferences, options: ListFormatterOptions) ->  error: DataError,
34            #[doc = concat!("Creates a new [`ListFormatter`] that produces a ", $doc, "-type list using compiled data.")]
35            ///
36            /// See the [CLDR spec](https://unicode.org/reports/tr35/tr35-general.html#ListPatterns) for
37            /// an explanation of the different types.
38            functions: [
39                $name,
40                $name_buffer,
41                $name_unstable,
42                Self
43            ]
44        );
45
46        #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::$name)]
47        pub fn $name_unstable(
48            provider: &(impl DataProvider<$marker> + ?Sized),
49            prefs: ListFormatterPreferences,
50            options: ListFormatterOptions,
51        ) -> Result<Self, DataError> {
52            let length = match options.length.unwrap_or_default() {
53                ListLength::Narrow => ListFormatterPatterns::NARROW,
54                ListLength::Short => ListFormatterPatterns::SHORT,
55                ListLength::Wide => ListFormatterPatterns::WIDE,
56            };
57            let locale = <$marker>::make_locale(prefs.locale_preferences);
58            let data = provider
59                .load(DataRequest {
60                    id: DataIdentifierBorrowed::for_marker_attributes_and_locale(
61                        length,
62                        &locale
63                    ),
64                    ..Default::default()
65                })?
66                .payload
67                .cast();
68            Ok(Self { data })
69        }
70    };
71}
72
73impl ListFormatter {
74    constructor!(
75        try_new_and,
76        try_new_and_with_buffer_provider,
77        try_new_and_unstable,
78        ListAndV1,
79        "and"
80    );
81    constructor!(
82        try_new_or,
83        try_new_or_with_buffer_provider,
84        try_new_or_unstable,
85        ListOrV1,
86        "or"
87    );
88    constructor!(
89        try_new_unit,
90        try_new_unit_with_buffer_provider,
91        try_new_unit_unstable,
92        ListUnitV1,
93        "unit"
94    );
95
96    /// Returns a [`Writeable`] composed of the input [`Writeable`]s and the language-dependent
97    /// formatting.
98    ///
99    /// The [`Writeable`] is annotated with [`parts::ELEMENT`] for input elements,
100    /// and [`parts::LITERAL`] for list literals.
101    ///
102    /// # Example
103    ///
104    /// ```
105    /// use icu::list::options::*;
106    /// use icu::list::{parts, ListFormatter};
107    /// # use icu::locale::locale;
108    /// # use writeable::*;
109    /// let formatteur = ListFormatter::try_new_and(
110    ///     locale!("fr").into(),
111    ///     ListFormatterOptions::default().with_length(ListLength::Wide),
112    /// )
113    /// .unwrap();
114    /// let pays = ["Italie", "France", "Espagne", "Allemagne"];
115    ///
116    /// assert_writeable_parts_eq!(
117    ///     formatteur.format(pays.iter()),
118    ///     "Italie, France, Espagne et Allemagne",
119    ///     [
120    ///         (0, 6, parts::ELEMENT),
121    ///         (6, 8, parts::LITERAL),
122    ///         (8, 14, parts::ELEMENT),
123    ///         (14, 16, parts::LITERAL),
124    ///         (16, 23, parts::ELEMENT),
125    ///         (23, 27, parts::LITERAL),
126    ///         (27, 36, parts::ELEMENT),
127    ///     ]
128    /// );
129    /// ```
130    pub fn format<'a, W: Writeable + 'a, I: Iterator<Item = W> + Clone + 'a>(
131        &'a self,
132        values: I,
133    ) -> FormattedList<'a, W, I> {
134        FormattedList {
135            formatter: self,
136            values,
137        }
138    }
139
140    /// Returns a [`String`] composed of the input [`Writeable`]s and the language-dependent
141    /// formatting.
142    #[cfg(feature = "alloc")]
143    pub fn format_to_string<W: Writeable, I: Iterator<Item = W> + Clone>(
144        &self,
145        values: I,
146    ) -> alloc::string::String {
147        self.format(values).write_to_string().into_owned()
148    }
149}
150
151/// The [`Part`]s used by [`ListFormatter`].
152pub mod parts {
153    use writeable::Part;
154
155    /// The [`Part`] used by [`FormattedList`](super::FormattedList) to mark the part of the string that is an element.
156    ///
157    /// * `category`: `"list"`
158    /// * `value`: `"element"`
159    pub const ELEMENT: Part = Part {
160        category: "list",
161        value: "element",
162    };
163
164    /// The [`Part`] used by [`FormattedList`](super::FormattedList) to mark the part of the string that is a list literal,
165    /// such as ", " or " and ".
166    ///
167    /// * `category`: `"list"`
168    /// * `value`: `"literal"`
169    pub const LITERAL: Part = Part {
170        category: "list",
171        value: "literal",
172    };
173}
174
175/// The [`Writeable`] implementation that is returned by [`ListFormatter::format`]. See
176/// the [`writeable`] crate for how to consume this.
177#[derive(Debug)]
178pub struct FormattedList<'a, W: Writeable + 'a, I: Iterator<Item = W> + Clone + 'a> {
179    formatter: &'a ListFormatter,
180    values: I,
181}
182
183impl<'a, W: Writeable + 'a, I: Iterator<Item = W> + Clone + 'a> Writeable
184    for FormattedList<'a, W, I>
185{
186    fn write_to_parts<V: PartsWrite + ?Sized>(&self, sink: &mut V) -> fmt::Result {
187        macro_rules! literal {
188            ($lit:ident) => {
189                sink.with_part(parts::LITERAL, |l| l.write_str($lit))
190            };
191        }
192        macro_rules! value {
193            ($val:expr) => {
194                sink.with_part(parts::ELEMENT, |e| $val.write_to_parts(e))
195            };
196        }
197
198        let patterns = self.formatter.data.get();
199
200        let mut values = self.values.clone();
201
202        if let Some(first) = values.next() {
203            if let Some(second) = values.next() {
204                if let Some(third) = values.next() {
205                    // Start(values[0], middle(..., middle(values[n-3], End(values[n-2], values[n-1]))...)) =
206                    // start_before + values[0] + start_between + (values[1..n-3] + middle_between)* +
207                    // values[n-2] + end_between + values[n-1] + end_after
208
209                    let (start_before, start_between, _) = patterns.start.parts();
210
211                    literal!(start_before)?;
212                    value!(first)?;
213                    literal!(start_between)?;
214                    value!(second)?;
215
216                    let mut next = third;
217
218                    for next_next in values {
219                        let between = &*patterns.middle;
220                        literal!(between)?;
221                        value!(next)?;
222                        next = next_next;
223                    }
224
225                    let (_, end_between, end_after) = patterns.end.parts(&next);
226                    literal!(end_between)?;
227                    value!(next)?;
228                    literal!(end_after)
229                } else {
230                    // Pair(values[0], values[1]) = pair_before + values[0] + pair_between + values[1] + pair_after
231                    let (before, between, after) = patterns
232                        .pair
233                        .as_ref()
234                        .unwrap_or(&patterns.end)
235                        .parts(&second);
236                    literal!(before)?;
237                    value!(first)?;
238                    literal!(between)?;
239                    value!(second)?;
240                    literal!(after)
241                }
242            } else {
243                value!(first)
244            }
245        } else {
246            Ok(())
247        }
248    }
249
250    fn writeable_length_hint(&self) -> LengthHint {
251        let mut count = 0;
252        let item_length = self
253            .values
254            .clone()
255            .map(|w| {
256                count += 1;
257                w.writeable_length_hint()
258            })
259            .sum::<LengthHint>();
260        item_length + self.formatter.data.get().length_hint(count)
261    }
262}
263
264impl<'a, W: Writeable + 'a, I: Iterator<Item = W> + Clone + 'a> core::fmt::Display
265    for FormattedList<'a, W, I>
266{
267    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
268        self.write_to(f)
269    }
270}
271
272#[cfg(all(test, feature = "datagen"))]
273mod tests {
274    use super::*;
275    use writeable::{assert_writeable_eq, assert_writeable_parts_eq};
276
277    fn formatter(patterns: ListFormatterPatterns<'static>) -> ListFormatter {
278        ListFormatter {
279            data: DataPayload::from_owned(patterns),
280        }
281    }
282
283    #[test]
284    fn test_slices() {
285        let formatter = formatter(crate::patterns::test::test_patterns_general());
286        let values = ["one", "two", "three", "four", "five"];
287
288        assert_writeable_eq!(formatter.format(values[0..0].iter()), "");
289        assert_writeable_eq!(formatter.format(values[0..1].iter()), "one");
290        assert_writeable_eq!(formatter.format(values[0..2].iter()), "$one;two+");
291        assert_writeable_eq!(formatter.format(values[0..3].iter()), "@one:two.three!");
292        assert_writeable_eq!(
293            formatter.format(values[0..4].iter()),
294            "@one:two,three.four!"
295        );
296
297        assert_writeable_parts_eq!(
298            formatter.format(values.iter()),
299            "@one:two,three,four.five!",
300            [
301                (0, 1, parts::LITERAL),
302                (1, 4, parts::ELEMENT),
303                (4, 5, parts::LITERAL),
304                (5, 8, parts::ELEMENT),
305                (8, 9, parts::LITERAL),
306                (9, 14, parts::ELEMENT),
307                (14, 15, parts::LITERAL),
308                (15, 19, parts::ELEMENT),
309                (19, 20, parts::LITERAL),
310                (20, 24, parts::ELEMENT),
311                (24, 25, parts::LITERAL)
312            ]
313        );
314    }
315
316    #[test]
317    fn test_into_iterator() {
318        let formatter = formatter(crate::patterns::test::test_patterns_general());
319
320        let mut vecdeque = std::collections::vec_deque::VecDeque::<u8>::new();
321        vecdeque.push_back(10);
322        vecdeque.push_front(48);
323
324        assert_writeable_parts_eq!(
325            formatter.format(vecdeque.iter()),
326            "$48;10+",
327            [
328                (0, 1, parts::LITERAL),
329                (1, 3, parts::ELEMENT),
330                (3, 4, parts::LITERAL),
331                (4, 6, parts::ELEMENT),
332                (6, 7, parts::LITERAL),
333            ]
334        );
335    }
336
337    #[test]
338    fn test_iterator() {
339        let formatter = formatter(crate::patterns::test::test_patterns_general());
340
341        assert_writeable_parts_eq!(
342            formatter.format(core::iter::repeat_n(5, 2)),
343            "$5;5+",
344            [
345                (0, 1, parts::LITERAL),
346                (1, 2, parts::ELEMENT),
347                (2, 3, parts::LITERAL),
348                (3, 4, parts::ELEMENT),
349                (4, 5, parts::LITERAL),
350            ]
351        );
352    }
353
354    #[test]
355    fn test_conditional() {
356        let formatter = formatter(crate::patterns::test::test_patterns_conditional());
357
358        assert_writeable_eq!(formatter.format(["beta", "alpha"].iter()), "beta :o alpha");
359    }
360
361    macro_rules! test {
362        ($locale:literal, $type:ident, $(($input:expr, $output:literal),)+) => {
363            let f = ListFormatter::$type(
364                icu::locale::locale!($locale).into(),
365                Default::default(),
366            ).unwrap();
367            $(
368                assert_writeable_eq!(f.format($input.iter()), $output);
369            )+
370        };
371    }
372
373    #[test]
374    fn test_basic() {
375        test!("fr", try_new_or, (["A", "B"], "A ou B"),);
376    }
377
378    #[test]
379    fn test_spanish() {
380        test!(
381            "es",
382            try_new_and,
383            (["x", "Mallorca"], "x y Mallorca"),
384            (["x", "Ibiza"], "x e Ibiza"),
385            (["x", "Hidalgo"], "x e Hidalgo"),
386            (["x", "Hierva"], "x y Hierva"),
387        );
388
389        test!(
390            "es",
391            try_new_or,
392            (["x", "Ibiza"], "x o Ibiza"),
393            (["x", "Okinawa"], "x u Okinawa"),
394            (["x", "8 más"], "x u 8 más"),
395            (["x", "8"], "x u 8"),
396            (["x", "87 más"], "x u 87 más"),
397            (["x", "87"], "x u 87"),
398            (["x", "11 más"], "x u 11 más"),
399            (["x", "11"], "x u 11"),
400            (["x", "110 más"], "x o 110 más"),
401            (["x", "110"], "x o 110"),
402            (["x", "11.000 más"], "x u 11.000 más"),
403            (["x", "11.000"], "x u 11.000"),
404            (["x", "11.000,92 más"], "x u 11.000,92 más"),
405            (["x", "11.000,92"], "x u 11.000,92"),
406        );
407
408        test!("es-AR", try_new_and, (["x", "Ibiza"], "x e Ibiza"),);
409    }
410
411    #[test]
412    fn test_hebrew() {
413        test!(
414            "he",
415            try_new_and,
416            (["x", "יפו"], "x ויפו"),
417            (["x", "Ibiza"], "x ו‑Ibiza"),
418        );
419    }
420}