icu_plurals/provider/rules/runtime/
ast.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::provider::rules::reference;
6use core::{
7    convert::{TryFrom, TryInto},
8    fmt, num,
9};
10use icu_provider::prelude::*;
11use zerovec::{
12    ule::{tuple::Tuple2ULE, AsULE, UleError, ULE},
13    {VarZeroVec, ZeroVec},
14};
15
16/// <div class="stab unstable">
17/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
18/// including in SemVer minor releases. In particular, the `DataProvider` implementations are only
19/// guaranteed to match with this version's `*_unstable` providers. Use with caution.
20/// </div>
21#[derive(yoke::Yokeable, zerofrom::ZeroFrom, Clone, PartialEq, Debug)]
22#[cfg_attr(feature = "datagen", derive(databake::Bake))]
23#[cfg_attr(feature = "datagen", databake(path = icu_plurals::provider::rules::runtime::ast))]
24#[allow(clippy::exhaustive_structs)] // Reference AST is non-public and this type is stable
25pub struct Rule<'data>(pub VarZeroVec<'data, RelationULE>);
26
27#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
28#[repr(u8)]
29pub(crate) enum AndOr {
30    Or,
31    And,
32}
33
34#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
35#[repr(u8)]
36pub(crate) enum Polarity {
37    Negative,
38    Positive,
39}
40
41#[derive(Copy, Clone, Debug, PartialEq, Eq, Ord, PartialOrd)]
42#[repr(u8)]
43#[zerovec::make_ule(OperandULE)]
44pub(crate) enum Operand {
45    N = 0,
46    I = 1,
47    V = 2,
48    W = 3,
49    F = 4,
50    T = 5,
51    C = 6,
52    E = 7,
53}
54
55#[derive(Clone, Copy, Debug, PartialEq, Eq, Ord, PartialOrd)]
56pub(crate) enum RangeOrValue {
57    Range(u32, u32),
58    Value(u32),
59}
60
61/// Represent a a single "relation" in a plural rule
62#[derive(Clone, Debug, PartialEq, Eq, Ord, PartialOrd)]
63#[zerovec::make_varule(RelationULE)]
64pub struct Relation<'data> {
65    pub(crate) aopo: AndOrPolarityOperand,
66    pub(crate) modulo: u32,
67    pub(crate) range_list: ZeroVec<'data, RangeOrValue>,
68}
69
70#[derive(Copy, Clone, Debug, PartialEq, Eq, Ord, PartialOrd)]
71pub(crate) struct AndOrPolarityOperand {
72    pub(crate) and_or: AndOr,
73    pub(crate) polarity: Polarity,
74    pub(crate) operand: Operand,
75}
76
77impl fmt::Debug for RelationULE {
78    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
79        self.as_relation().fmt(f)
80    }
81}
82/////
83
84impl TryFrom<&reference::ast::Rule> for Rule<'_> {
85    type Error = num::TryFromIntError;
86
87    fn try_from(input: &reference::ast::Rule) -> Result<Self, Self::Error> {
88        let mut relations: alloc::vec::Vec<Relation> = alloc::vec![];
89
90        for (i_or, and_condition) in input.condition.0.iter().enumerate() {
91            for (i_and, relation) in and_condition.0.iter().enumerate() {
92                let range_list = relation
93                    .range_list
94                    .0
95                    .iter()
96                    .map(|rov| rov.try_into())
97                    .collect::<Result<alloc::vec::Vec<_>, _>>()?;
98
99                let and_or = if i_or > 0 && i_and == 0 {
100                    AndOr::Or
101                } else {
102                    AndOr::And
103                };
104
105                let aopo = AndOrPolarityOperand {
106                    and_or,
107                    polarity: relation.operator.into(),
108                    operand: relation.expression.operand.into(),
109                };
110
111                relations.push(Relation {
112                    aopo,
113                    modulo: get_modulo(&relation.expression.modulus)?,
114                    range_list: ZeroVec::alloc_from_slice(&range_list),
115                })
116            }
117        }
118
119        Ok(Self(VarZeroVec::from(relations.as_slice())))
120    }
121}
122
123impl From<&Rule<'_>> for reference::ast::Rule {
124    fn from(input: &Rule<'_>) -> Self {
125        let mut or_conditions: alloc::vec::Vec<reference::ast::AndCondition> = alloc::vec![];
126        let mut and_conditions: alloc::vec::Vec<reference::ast::Relation> = alloc::vec![];
127        for rel in input.0.iter() {
128            let rel = rel.as_relation();
129            let list = rel.range_list.iter().map(Into::into).collect();
130            let relation = reference::ast::Relation {
131                expression: (rel.aopo.operand, rel.modulo).into(),
132                operator: rel.aopo.polarity.into(),
133                range_list: reference::ast::RangeList(list),
134            };
135
136            if rel.aopo.and_or == AndOr::And {
137                and_conditions.push(relation);
138            } else {
139                or_conditions.push(reference::ast::AndCondition(and_conditions));
140                and_conditions = alloc::vec![relation];
141            }
142        }
143
144        if !and_conditions.is_empty() {
145            or_conditions.push(reference::ast::AndCondition(and_conditions));
146        }
147
148        Self {
149            condition: reference::ast::Condition(or_conditions),
150            samples: None,
151        }
152    }
153}
154
155impl From<reference::ast::Operator> for Polarity {
156    fn from(op: reference::ast::Operator) -> Self {
157        match op {
158            reference::ast::Operator::Eq => Polarity::Positive,
159            reference::ast::Operator::NotEq => Polarity::Negative,
160        }
161    }
162}
163
164impl From<Polarity> for reference::ast::Operator {
165    fn from(pol: Polarity) -> Self {
166        match pol {
167            Polarity::Negative => reference::ast::Operator::NotEq,
168            Polarity::Positive => reference::ast::Operator::Eq,
169        }
170    }
171}
172
173impl From<reference::ast::Operand> for Operand {
174    fn from(op: reference::ast::Operand) -> Self {
175        match op {
176            reference::ast::Operand::N => Self::N,
177            reference::ast::Operand::I => Self::I,
178            reference::ast::Operand::V => Self::V,
179            reference::ast::Operand::W => Self::W,
180            reference::ast::Operand::F => Self::F,
181            reference::ast::Operand::T => Self::T,
182            reference::ast::Operand::C => Self::C,
183            reference::ast::Operand::E => Self::E,
184        }
185    }
186}
187
188impl From<Operand> for reference::ast::Operand {
189    fn from(op: Operand) -> Self {
190        match op {
191            Operand::N => Self::N,
192            Operand::I => Self::I,
193            Operand::V => Self::V,
194            Operand::W => Self::W,
195            Operand::F => Self::F,
196            Operand::T => Self::T,
197            Operand::C => Self::C,
198            Operand::E => Self::E,
199        }
200    }
201}
202
203impl From<(Operand, u32)> for reference::ast::Expression {
204    fn from(input: (Operand, u32)) -> Self {
205        Self {
206            operand: input.0.into(),
207            modulus: get_modulus(input.1),
208        }
209    }
210}
211
212fn get_modulo(op: &Option<reference::ast::Value>) -> Result<u32, num::TryFromIntError> {
213    if let Some(op) = op {
214        u32::try_from(op)
215    } else {
216        Ok(0)
217    }
218}
219
220fn get_modulus(input: u32) -> Option<reference::ast::Value> {
221    if input == 0 {
222        None
223    } else {
224        Some(input.into())
225    }
226}
227
228impl TryFrom<&reference::ast::Value> for u32 {
229    type Error = num::TryFromIntError;
230
231    fn try_from(v: &reference::ast::Value) -> Result<Self, Self::Error> {
232        v.0.try_into()
233    }
234}
235
236impl From<u32> for reference::ast::Value {
237    fn from(input: u32) -> Self {
238        Self(input.into())
239    }
240}
241
242impl TryFrom<&reference::ast::RangeListItem> for RangeOrValue {
243    type Error = num::TryFromIntError;
244
245    fn try_from(item: &reference::ast::RangeListItem) -> Result<Self, Self::Error> {
246        Ok(match item {
247            reference::ast::RangeListItem::Range(range) => {
248                RangeOrValue::Range(range.start().try_into()?, range.end().try_into()?)
249            }
250            reference::ast::RangeListItem::Value(value) => RangeOrValue::Value(value.try_into()?),
251        })
252    }
253}
254
255impl From<RangeOrValue> for reference::ast::RangeListItem {
256    fn from(item: RangeOrValue) -> Self {
257        match item {
258            RangeOrValue::Range(min, max) => Self::Range(min.into()..=max.into()),
259            RangeOrValue::Value(value) => Self::Value(value.into()),
260        }
261    }
262}
263
264#[cfg(feature = "datagen")]
265impl core::str::FromStr for Rule<'_> {
266    type Err = reference::parser::ParseError;
267
268    fn from_str(s: &str) -> Result<Self, Self::Err> {
269        let rule = reference::parser::parse(s.as_bytes())?;
270        Rule::try_from(&rule).map_err(|_| reference::parser::ParseError::ValueTooLarge)
271    }
272}
273
274impl fmt::Display for Rule<'_> {
275    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
276        let reference = reference::ast::Rule::from(self);
277        reference::serialize(&reference, f)
278    }
279}
280
281impl RelationULE {
282    /// Convert to a Relation
283    #[inline]
284    pub fn as_relation(&self) -> Relation {
285        zerofrom::ZeroFrom::zero_from(self)
286    }
287}
288
289#[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)]
290#[repr(transparent)]
291pub(crate) struct AndOrPolarityOperandULE(u8);
292
293// Safety (based on the safety checklist on the ULE trait):
294//  1. AndOrPolarityOperandULE does not include any uninitialized or padding bytes
295//     (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
296/// 2. AndOrPolarityOperandULE is aligned to 1 byte
297//     (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
298//  3. The impl of validate_bytes() returns an error if any byte is not valid.
299//  4. The impl of validate_bytes() returns an error if there are extra bytes
300//     (impossible since it is of size 1 byte)
301//  5 The other ULE methods use the default impl.
302//  6. AndOrPolarityOperandULE byte equality is semantic equality.
303unsafe impl ULE for AndOrPolarityOperandULE {
304    fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> {
305        for byte in bytes {
306            Operand::new_from_u8(byte & 0b0011_1111).ok_or_else(UleError::parse::<Self>)?;
307        }
308        Ok(())
309    }
310}
311
312impl AsULE for AndOrPolarityOperand {
313    type ULE = AndOrPolarityOperandULE;
314    fn to_unaligned(self) -> AndOrPolarityOperandULE {
315        let encoded_operand = self.operand.to_unaligned().0;
316        debug_assert!(encoded_operand <= 0b0011_1111);
317        AndOrPolarityOperandULE(
318            (((self.and_or == AndOr::And) as u8) << 7)
319                + (((self.polarity == Polarity::Positive) as u8) << 6)
320                + encoded_operand,
321        )
322    }
323
324    fn from_unaligned(other: AndOrPolarityOperandULE) -> Self {
325        let encoded = other.0;
326        let and_or = if encoded & 0b1000_0000 != 0 {
327            AndOr::And
328        } else {
329            AndOr::Or
330        };
331
332        let polarity = if encoded & 0b0100_0000 != 0 {
333            Polarity::Positive
334        } else {
335            Polarity::Negative
336        };
337
338        // note that this is unsafe, since OperandULE has its own
339        // safety requirements
340        // we can guarantee safety here since these bits can only come
341        // from validated OperandULEs
342        let operand = OperandULE(encoded & 0b0011_1111);
343        Self {
344            and_or,
345            polarity,
346            operand: Operand::from_unaligned(operand),
347        }
348    }
349}
350
351type RangeOrValueULE = Tuple2ULE<<u32 as AsULE>::ULE, <u32 as AsULE>::ULE>;
352
353impl AsULE for RangeOrValue {
354    type ULE = RangeOrValueULE;
355
356    #[inline]
357    fn to_unaligned(self) -> Self::ULE {
358        match self {
359            Self::Range(start, end) => Tuple2ULE(start.to_unaligned(), end.to_unaligned()),
360            Self::Value(idx) => Tuple2ULE(idx.to_unaligned(), idx.to_unaligned()),
361        }
362    }
363
364    #[inline]
365    fn from_unaligned(unaligned: Self::ULE) -> Self {
366        let start = u32::from_unaligned(unaligned.0);
367        let end = u32::from_unaligned(unaligned.1);
368        if start == end {
369            Self::Value(start)
370        } else {
371            Self::Range(start, end)
372        }
373    }
374}
375
376#[cfg(feature = "serde")]
377mod serde {
378    use super::*;
379    use ::serde::{de, ser, Deserialize, Deserializer, Serialize};
380    use alloc::{
381        format,
382        string::{String, ToString},
383    };
384
385    impl Serialize for Rule<'_> {
386        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
387        where
388            S: ser::Serializer,
389        {
390            if serializer.is_human_readable() {
391                let string: String = self.to_string();
392                serializer.serialize_str(&string)
393            } else {
394                serializer.serialize_bytes(self.0.as_bytes())
395            }
396        }
397    }
398
399    struct DeserializeRule;
400
401    impl<'de> de::Visitor<'de> for DeserializeRule {
402        type Value = Rule<'de>;
403
404        fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
405            write!(formatter, "a valid rule.")
406        }
407
408        fn visit_borrowed_str<E>(self, rule_string: &'de str) -> Result<Self::Value, E>
409        where
410            E: de::Error,
411        {
412            fn from_str(s: &str) -> Result<Rule, reference::parser::ParseError> {
413                let rule = reference::parser::parse(s.as_bytes())?;
414                Rule::try_from(&rule).map_err(|_| reference::parser::ParseError::ValueTooLarge)
415            }
416
417            from_str(rule_string).map_err(|err| {
418                de::Error::invalid_value(
419                    de::Unexpected::Other(&format!("{err}")),
420                    &"a valid UTS 35 rule string",
421                )
422            })
423        }
424
425        fn visit_borrowed_bytes<E>(self, rule_bytes: &'de [u8]) -> Result<Self::Value, E>
426        where
427            E: de::Error,
428        {
429            let rule = VarZeroVec::parse_bytes(rule_bytes).map_err(|err| {
430                de::Error::invalid_value(
431                    de::Unexpected::Other(&format!("{err}")),
432                    &"a valid UTS 35 rule byte slice",
433                )
434            })?;
435            Ok(Rule(rule))
436        }
437    }
438
439    impl<'de: 'data, 'data> Deserialize<'de> for Rule<'data> {
440        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
441        where
442            D: Deserializer<'de>,
443        {
444            if deserializer.is_human_readable() {
445                deserializer.deserialize_str(DeserializeRule)
446            } else {
447                deserializer.deserialize_bytes(DeserializeRule)
448            }
449        }
450    }
451}
452
453#[cfg(test)]
454mod test {
455    use super::*;
456    use crate::provider::rules::reference;
457    use crate::provider::rules::runtime::test_rule;
458    use crate::PluralOperands;
459
460    #[test]
461    fn simple_rule_test() {
462        use reference::ast;
463
464        let input = "i = 1";
465        let full_ast = reference::parse(input.as_bytes()).expect("Failed to convert Rule");
466        assert_eq!(
467            full_ast,
468            ast::Rule {
469                condition: ast::Condition(vec![ast::AndCondition(vec![ast::Relation {
470                    expression: ast::Expression {
471                        operand: ast::Operand::I,
472                        modulus: None,
473                    },
474                    operator: ast::Operator::Eq,
475                    range_list: ast::RangeList(vec![ast::RangeListItem::Value(ast::Value(1))])
476                }])]),
477                samples: None,
478            }
479        );
480
481        let rule = Rule::try_from(&full_ast).expect("Failed to convert Rule");
482        let relation = rule
483            .0
484            .iter()
485            .next()
486            .expect("Should have a relation")
487            .as_relation();
488        assert_eq!(
489            relation,
490            Relation {
491                aopo: AndOrPolarityOperand {
492                    and_or: AndOr::And,
493                    polarity: Polarity::Positive,
494                    operand: Operand::I,
495                },
496                modulo: 0,
497                range_list: ZeroVec::new_borrowed(&[RangeOrValue::Value(1).to_unaligned()])
498            }
499        );
500
501        let fd = fixed_decimal::Decimal::from(1);
502        let operands = PluralOperands::from(&fd);
503        assert!(test_rule(&rule, &operands),);
504    }
505
506    #[test]
507    fn complex_rule_test() {
508        let input = "n % 10 = 3..4, 9 and n % 100 != 10..19, 70..79, 90..99 or n = 0";
509        let ref_rule = reference::parse(input.as_bytes()).expect("Failed to parse Rule");
510        let rule = Rule::try_from(&ref_rule).expect("Failed to convert Rule");
511
512        let fd = fixed_decimal::Decimal::from(0);
513        let operands = PluralOperands::from(&fd);
514        assert!(test_rule(&rule, &operands),);
515
516        let fd = fixed_decimal::Decimal::from(13);
517        let operands = PluralOperands::from(&fd);
518        assert!(!test_rule(&rule, &operands),);
519
520        let fd = fixed_decimal::Decimal::from(103);
521        let operands = PluralOperands::from(&fd);
522        assert!(test_rule(&rule, &operands),);
523
524        let fd = fixed_decimal::Decimal::from(113);
525        let operands = PluralOperands::from(&fd);
526        assert!(!test_rule(&rule, &operands),);
527
528        let fd = fixed_decimal::Decimal::from(178);
529        let operands = PluralOperands::from(&fd);
530        assert!(!test_rule(&rule, &operands),);
531
532        let fd = fixed_decimal::Decimal::from(0);
533        let operands = PluralOperands::from(&fd);
534        assert!(test_rule(&rule, &operands),);
535    }
536
537    #[test]
538    fn complex_rule_ule_roundtrip_test() {
539        let input = "n % 10 = 3..4, 9 and n % 100 != 10..19, 70..79, 90..99 or n = 0";
540
541        let ref_rule = reference::parse(input.as_bytes()).unwrap();
542
543        // Create a ZVZ backed Rule from the reference one.
544        let rule = Rule::try_from(&ref_rule).expect("Failed to convert Rule");
545
546        // Convert it back to reference Rule and compare.
547        assert_eq!(ref_rule, reference::ast::Rule::from(&rule));
548
549        // Verify that the stringified output matches the input.
550        assert_eq!(input, rule.to_string(),);
551    }
552
553    #[test]
554    fn range_or_value_ule_test() {
555        let rov = RangeOrValue::Value(1);
556        let ule = rov.to_unaligned();
557        let ref_bytes = &[1, 0, 0, 0, 1, 0, 0, 0];
558        assert_eq!(ULE::slice_as_bytes(&[ule]), *ref_bytes);
559
560        let rov = RangeOrValue::Range(2, 4);
561        let ule = rov.to_unaligned();
562        let ref_bytes = &[2, 0, 0, 0, 4, 0, 0, 0];
563        assert_eq!(ULE::slice_as_bytes(&[ule]), *ref_bytes);
564    }
565
566    #[test]
567    fn relation_ule_test() {
568        let rov = RangeOrValue::Value(1);
569        let aopo = AndOrPolarityOperand {
570            and_or: AndOr::And,
571            polarity: Polarity::Positive,
572            operand: Operand::N,
573        };
574        let relation = Relation {
575            aopo,
576            modulo: 0,
577            range_list: ZeroVec::alloc_from_slice(&[rov]),
578        };
579        let relations = alloc::vec![relation];
580        let vzv = VarZeroVec::<_>::from(relations.as_slice());
581        assert_eq!(
582            vzv.as_bytes(),
583            &[1, 0, 192, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0]
584        );
585    }
586}