ixdtf/
core.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Core functionality for `ixdtf`'s parsers
6
7use crate::{ParseError, ParserResult};
8
9mod private {
10    pub trait Sealed {}
11}
12
13/// A trait for defining various supported encodings
14/// and implementing functionality that is encoding
15/// sensitive / specific.
16pub trait EncodingType: private::Sealed {
17    type CodeUnit: PartialEq + core::fmt::Debug + Clone;
18
19    /// Get a slice from the underlying source using for start..end
20    fn slice(source: &[Self::CodeUnit], start: usize, end: usize) -> Option<&[Self::CodeUnit]>;
21
22    /// Retrieve the provided code unit index and returns the value as an ASCII byte
23    /// or None if the value is not ASCII representable.
24    fn get_ascii(source: &[Self::CodeUnit], index: usize) -> ParserResult<Option<u8>>;
25
26    /// Checks for the known calendar annotation key `u-ca`.
27    fn check_calendar_key(key: &[Self::CodeUnit]) -> bool;
28}
29
30/// A marker type that signals a parser should parse the source as UTF-16 bytes.
31#[derive(Debug, PartialEq, Clone)]
32#[allow(clippy::exhaustive_structs)] // ZST Marker trait, no fields should be added
33pub struct Utf16;
34
35impl private::Sealed for Utf16 {}
36
37impl EncodingType for Utf16 {
38    type CodeUnit = u16;
39    fn slice(source: &[Self::CodeUnit], start: usize, end: usize) -> Option<&[Self::CodeUnit]> {
40        source.get(start..end)
41    }
42
43    fn get_ascii(source: &[Self::CodeUnit], index: usize) -> ParserResult<Option<u8>> {
44        source.get(index).copied().map(to_ascii_byte).transpose()
45    }
46
47    fn check_calendar_key(key: &[Self::CodeUnit]) -> bool {
48        key == [0x75, 0x2d, 0x63, 0x61]
49    }
50}
51
52#[inline]
53fn to_ascii_byte(b: u16) -> ParserResult<u8> {
54    if !(0x01..0x7F).contains(&b) {
55        return Err(ParseError::NonAsciiCodePoint);
56    }
57    Ok(b as u8)
58}
59
60/// A marker type that signals a parser should parse the source as UTF-8 bytes.
61#[derive(Debug, PartialEq, Clone)]
62#[allow(clippy::exhaustive_structs)] // ZST Marker trait, no fields should be added.
63pub struct Utf8;
64
65impl private::Sealed for Utf8 {}
66
67impl EncodingType for Utf8 {
68    type CodeUnit = u8;
69
70    fn slice<'a>(source: &[Self::CodeUnit], start: usize, end: usize) -> Option<&[Self::CodeUnit]> {
71        source.get(start..end)
72    }
73
74    fn get_ascii(source: &[Self::CodeUnit], index: usize) -> ParserResult<Option<u8>> {
75        Ok(source.get(index).copied())
76    }
77
78    fn check_calendar_key(key: &[Self::CodeUnit]) -> bool {
79        key == "u-ca".as_bytes()
80    }
81}
82
83// ==== Mini cursor implementation for Iso8601 targets ====
84
85/// `Cursor` is a small cursor implementation for parsing Iso8601 grammar.
86#[derive(Debug)]
87pub(crate) struct Cursor<'a, T: EncodingType> {
88    pos: usize,
89    source: &'a [T::CodeUnit],
90}
91
92impl<'a, T: EncodingType> Cursor<'a, T> {
93    /// Create a new cursor from a source UTF8 string.
94    #[must_use]
95    pub fn new(source: &'a [T::CodeUnit]) -> Self {
96        Self { pos: 0, source }
97    }
98
99    /// Returns a string value from a slice of the cursor.
100    pub(crate) fn slice(&self, start: usize, end: usize) -> Option<&'a [T::CodeUnit]> {
101        T::slice(self.source, start, end)
102    }
103
104    /// Get current position
105    pub(crate) const fn pos(&self) -> usize {
106        self.pos
107    }
108
109    /// Get current position
110    pub(crate) fn set_position(&mut self, pos: usize) {
111        self.pos = pos;
112    }
113
114    /// Peek the value at next position (current + 1).
115    pub(crate) fn peek(&self) -> ParserResult<Option<u8>> {
116        self.peek_n(1)
117    }
118
119    /// Returns current position in source as `char`.
120    pub(crate) fn current(&self) -> ParserResult<Option<u8>> {
121        self.peek_n(0)
122    }
123
124    /// Peeks the value at `n` as a `char`.
125    pub(crate) fn peek_n(&self, n: usize) -> ParserResult<Option<u8>> {
126        T::get_ascii(self.source, self.pos + n)
127    }
128
129    /// Runs the provided check on the current position.
130    pub(crate) fn check<F>(&self, f: F) -> ParserResult<Option<bool>>
131    where
132        F: FnOnce(u8) -> bool,
133    {
134        Ok(self.current()?.map(f))
135    }
136
137    /// Runs the provided check on current position returns the default value if None.
138    pub(crate) fn check_or<F>(&self, default: bool, f: F) -> ParserResult<bool>
139    where
140        F: FnOnce(u8) -> bool,
141    {
142        Ok(self.current()?.map_or(default, f))
143    }
144
145    /// Returns `Cursor`'s current char and advances to the next position.
146    pub(crate) fn next(&mut self) -> ParserResult<Option<u8>> {
147        let result = self.current();
148        self.advance_n(1);
149        result
150    }
151
152    /// Returns the next value as a digit
153    ///
154    /// # Errors
155    ///   - Returns an AbruptEnd error if cursor ends.
156    pub(crate) fn next_digit(&mut self) -> ParserResult<Option<u8>> {
157        let ascii_char = self.next_or(ParseError::AbruptEnd { location: "digit" })?;
158        if ascii_char.is_ascii_digit() {
159            Ok(Some(ascii_char - 48))
160        } else {
161            Ok(None)
162        }
163    }
164
165    /// A utility next method that returns an `AbruptEnd` error if invalid.
166    pub(crate) fn next_or(&mut self, err: ParseError) -> ParserResult<u8> {
167        self.next()?.ok_or(err)
168    }
169
170    /// Advances the cursor's position by n code points.
171    pub(crate) fn advance_n(&mut self, n: usize) {
172        self.pos += n;
173    }
174
175    // Advances the cursor by 1 code point.
176    pub(crate) fn advance(&mut self) {
177        self.advance_n(1)
178    }
179
180    /// Utility function to advance when a condition is true
181    pub(crate) fn advance_if(&mut self, condition: bool) {
182        if condition {
183            self.advance();
184        }
185    }
186
187    /// Closes the current cursor by checking if all contents have been consumed. If not, returns an error for invalid syntax.
188    pub(crate) fn close(&mut self) -> ParserResult<()> {
189        if self.pos < self.source.len() {
190            return Err(ParseError::InvalidEnd);
191        }
192        Ok(())
193    }
194}