ixdtf/parsers/
mod.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! The parser module contains the implementation details for `IxdtfParser` and `IsoDurationParser`
6
7use crate::{ParseError, ParserResult};
8
9#[cfg(feature = "duration")]
10use records::DurationParseRecord;
11use records::{IxdtfParseRecord, UtcOffsetRecord};
12
13use self::records::Annotation;
14
15pub mod records;
16
17mod annotations;
18pub(crate) mod datetime;
19#[cfg(feature = "duration")]
20pub(crate) mod duration;
21mod grammar;
22mod time;
23pub(crate) mod timezone;
24
25#[cfg(test)]
26mod tests;
27
28/// `assert_syntax!` is a parser specific utility macro for asserting a syntax test, and returning the
29/// the provided provided error if the assertion fails.
30#[macro_export]
31macro_rules! assert_syntax {
32    ($cond:expr, $err:ident $(,)?) => {
33        if !$cond {
34            return Err(ParseError::$err);
35        }
36    };
37}
38
39/// `IxdtfParser` is the primary parser implementation of `ixdtf`.
40///
41/// This parser provides various options for parsing date/time strings with the extended notation
42/// laid out in [RFC9557][rfc9557] along with other variations laid out in the [`Temporal`][temporal-proposal].
43///
44/// ```rust
45/// use ixdtf::parsers::{
46///     records::{Sign, TimeZoneRecord, UtcOffsetRecord},
47///     IxdtfParser,
48/// };
49///
50/// let ixdtf_str = "2024-03-02T08:48:00-05:00[America/New_York]";
51///
52/// let result = IxdtfParser::from_str(ixdtf_str).parse().unwrap();
53///
54/// let date = result.date.unwrap();
55/// let time = result.time.unwrap();
56/// let offset = result.offset.unwrap().resolve_rfc_9557();
57/// let tz_annotation = result.tz.unwrap();
58///
59/// assert_eq!(date.year, 2024);
60/// assert_eq!(date.month, 3);
61/// assert_eq!(date.day, 2);
62/// assert_eq!(time.hour, 8);
63/// assert_eq!(time.minute, 48);
64/// assert_eq!(offset.sign(), Sign::Negative);
65/// assert_eq!(offset.hour(), 5);
66/// assert_eq!(offset.minute(), 0);
67/// assert_eq!(offset.second(), None);
68/// assert_eq!(offset.fraction(), None);
69/// assert!(!tz_annotation.critical);
70/// assert_eq!(
71///     tz_annotation.tz,
72///     TimeZoneRecord::Name("America/New_York".as_bytes())
73/// );
74/// ```
75///
76/// [rfc9557]: https://datatracker.ietf.org/doc/rfc9557/
77/// [temporal-proposal]: https://tc39.es/proposal-temporal/
78#[derive(Debug)]
79pub struct IxdtfParser<'a> {
80    cursor: Cursor<'a>,
81}
82
83impl<'a> IxdtfParser<'a> {
84    /// Creates a new `IxdtfParser` from a slice of utf-8 bytes.
85    #[inline]
86    #[must_use]
87    pub fn from_utf8(source: &'a [u8]) -> Self {
88        Self {
89            cursor: Cursor::new(source),
90        }
91    }
92
93    /// Creates a new `IxdtfParser` from a source `&str`.
94    #[inline]
95    #[must_use]
96    #[allow(clippy::should_implement_trait)]
97    pub fn from_str(source: &'a str) -> Self {
98        Self::from_utf8(source.as_bytes())
99    }
100
101    /// Parses the source as an [extended Date/Time string][rfc9557].
102    ///
103    /// This is the baseline parse method for `ixdtf`. For this method, the
104    /// TimeRecord, UTCOffsetRecord, and all annotations are optional.
105    ///
106    /// # Example
107    ///
108    /// [rfc9557]: https://datatracker.ietf.org/doc/rfc9557/
109    pub fn parse(&mut self) -> ParserResult<IxdtfParseRecord<'a>> {
110        self.parse_with_annotation_handler(Some)
111    }
112
113    /// Parses the source as an extended Date/Time string with an Annotation handler.
114    ///
115    /// For more, see [Implementing Annotation Handlers](crate#implementing-annotation-handlers)
116    pub fn parse_with_annotation_handler(
117        &mut self,
118        handler: impl FnMut(Annotation<'a>) -> Option<Annotation<'a>>,
119    ) -> ParserResult<IxdtfParseRecord<'a>> {
120        datetime::parse_annotated_date_time(&mut self.cursor, handler)
121    }
122
123    /// Parses the source as an extended [YearMonth string][temporal-ym].
124    ///
125    /// # Example
126    ///
127    /// ```rust
128    /// # use ixdtf::parsers::IxdtfParser;
129    ///
130    /// let extended_year_month = "2020-11[u-ca=iso8601]";
131    ///
132    /// let result = IxdtfParser::from_str(extended_year_month)
133    ///     .parse_year_month()
134    ///     .unwrap();
135    ///
136    /// let date = result.date.unwrap();
137    ///
138    /// assert_eq!(date.year, 2020);
139    /// assert_eq!(date.month, 11);
140    /// ```
141    ///
142    /// [temporal-ym]: https://tc39.es/proposal-temporal/#prod-TemporalYearMonthString
143    pub fn parse_year_month(&mut self) -> ParserResult<IxdtfParseRecord<'a>> {
144        self.parse_year_month_with_annotation_handler(Some)
145    }
146
147    /// Parses the source as an extended YearMonth string with an Annotation handler.
148    ///
149    /// For more, see [Implementing Annotation Handlers](crate#implementing-annotation-handlers)
150    pub fn parse_year_month_with_annotation_handler(
151        &mut self,
152        handler: impl FnMut(Annotation<'a>) -> Option<Annotation<'a>>,
153    ) -> ParserResult<IxdtfParseRecord<'a>> {
154        datetime::parse_annotated_year_month(&mut self.cursor, handler)
155    }
156
157    /// Parses the source as an extended [MonthDay string][temporal-md].
158    ///
159    /// # Example
160    ///
161    /// ```rust
162    /// # use ixdtf::parsers::IxdtfParser;
163    /// let extended_month_day = "1107[+04:00]";
164    ///
165    /// let result = IxdtfParser::from_str(extended_month_day)
166    ///     .parse_month_day()
167    ///     .unwrap();
168    ///
169    /// let date = result.date.unwrap();
170    ///
171    /// assert_eq!(date.month, 11);
172    /// assert_eq!(date.day, 7);
173    /// ```
174    ///
175    /// [temporal-md]: https://tc39.es/proposal-temporal/#prod-TemporalMonthDayString
176    pub fn parse_month_day(&mut self) -> ParserResult<IxdtfParseRecord<'a>> {
177        self.parse_month_day_with_annotation_handler(Some)
178    }
179
180    /// Parses the source as an extended MonthDay string with an Annotation handler.
181    ///
182    /// For more, see [Implementing Annotation Handlers](crate#implementing-annotation-handlers)
183    pub fn parse_month_day_with_annotation_handler(
184        &mut self,
185        handler: impl FnMut(Annotation<'a>) -> Option<Annotation<'a>>,
186    ) -> ParserResult<IxdtfParseRecord<'a>> {
187        datetime::parse_annotated_month_day(&mut self.cursor, handler)
188    }
189
190    /// Parses the source as an extended [Time string][temporal-time].
191    ///
192    /// # Example
193    ///
194    /// ```rust
195    /// # use ixdtf::parsers::{IxdtfParser, records::{Sign, TimeZoneRecord}};
196    /// let extended_time = "12:01:04-05:00[America/New_York][u-ca=iso8601]";
197    ///
198    /// let result = IxdtfParser::from_str(extended_time).parse_time().unwrap();
199    ///
200    /// let time = result.time.unwrap();
201    /// let offset = result.offset.unwrap().resolve_rfc_9557();
202    /// let tz_annotation = result.tz.unwrap();
203    ///
204    /// assert_eq!(time.hour, 12);
205    /// assert_eq!(time.minute, 1);
206    /// assert_eq!(time.second, 4);
207    /// assert_eq!(offset.sign(), Sign::Negative);
208    /// assert_eq!(offset.hour(), 5);
209    /// assert_eq!(offset.minute(), 0);
210    /// assert!(!tz_annotation.critical);
211    /// assert_eq!(
212    ///     tz_annotation.tz,
213    ///     TimeZoneRecord::Name("America/New_York".as_bytes())
214    /// );
215    /// ```
216    ///
217    /// [temporal-time]: https://tc39.es/proposal-temporal/#prod-TemporalTimeString
218    pub fn parse_time(&mut self) -> ParserResult<IxdtfParseRecord<'a>> {
219        self.parse_time_with_annotation_handler(Some)
220    }
221
222    /// Parses the source as an extended Time string with an Annotation handler.
223    ///
224    /// For more, see [Implementing Annotation Handlers](crate#implementing-annotation-handlers)
225    pub fn parse_time_with_annotation_handler(
226        &mut self,
227        handler: impl FnMut(Annotation<'a>) -> Option<Annotation<'a>>,
228    ) -> ParserResult<IxdtfParseRecord<'a>> {
229        time::parse_annotated_time_record(&mut self.cursor, handler)
230    }
231}
232
233/// A parser for time zone offset and IANA identifier strings.
234///
235/// ✨ *Enabled with the `timezone` Cargo feature.*
236#[derive(Debug)]
237pub struct TimeZoneParser<'a> {
238    cursor: Cursor<'a>,
239}
240
241impl<'a> TimeZoneParser<'a> {
242    /// Creates a new `TimeZoneParser` from a slice of utf-8 bytes.
243    #[inline]
244    #[must_use]
245    pub fn from_utf8(source: &'a [u8]) -> Self {
246        Self {
247            cursor: Cursor::new(source),
248        }
249    }
250
251    /// Creates a new `TimeZoneParser` from a source `&str`.
252    #[inline]
253    #[must_use]
254    #[allow(clippy::should_implement_trait)]
255    pub fn from_str(source: &'a str) -> Self {
256        Self::from_utf8(source.as_bytes())
257    }
258
259    /// Parse a UTC offset from the provided source.
260    ///
261    /// This method can parse both a minute precision and full
262    /// precision offset.
263    ///
264    /// ## Minute precision offset example
265    ///
266    /// ```rust
267    /// use ixdtf::parsers::{records::Sign, TimeZoneParser};
268    ///
269    /// let offset_src = "-05:00";
270    /// let parse_result =
271    ///     TimeZoneParser::from_str(offset_src).parse_offset().unwrap();
272    /// assert_eq!(parse_result.sign(), Sign::Negative);
273    /// assert_eq!(parse_result.hour(), 5);
274    /// assert_eq!(parse_result.minute(), 0);
275    /// assert_eq!(parse_result.second(), None);
276    /// assert_eq!(parse_result.fraction(), None);
277    /// ```
278    ///
279    /// ## Full precision offset example
280    ///
281    /// ```rust
282    /// use ixdtf::parsers::{records::Sign, TimeZoneParser};
283    ///
284    /// let offset_src = "-05:00:30.123456789";
285    /// let parse_result =
286    ///     TimeZoneParser::from_str(offset_src).parse_offset().unwrap();
287    /// assert_eq!(parse_result.sign(), Sign::Negative);
288    /// assert_eq!(parse_result.hour(), 5);
289    /// assert_eq!(parse_result.minute(), 0);
290    /// assert_eq!(parse_result.second(), Some(30));
291    /// let fraction = parse_result.fraction().unwrap();
292    /// assert_eq!(fraction.to_nanoseconds(), Some(123456789));
293    /// ```
294    #[inline]
295    pub fn parse_offset(&mut self) -> ParserResult<UtcOffsetRecord> {
296        let result = timezone::parse_utc_offset(&mut self.cursor)?;
297        self.cursor.close()?;
298        Ok(result)
299    }
300
301    /// Parse an IANA identifier name.
302    ///
303    ///
304    /// ```rust
305    /// use ixdtf::parsers::{records::Sign, TimeZoneParser};
306    ///
307    /// let iana_identifier = "America/Chicago";
308    /// let parse_result = TimeZoneParser::from_str(iana_identifier)
309    ///     .parse_iana_identifier()
310    ///     .unwrap();
311    /// assert_eq!(parse_result, iana_identifier.as_bytes());
312    ///
313    /// let iana_identifier = "Europe/Berlin";
314    /// let parse_result = TimeZoneParser::from_str(iana_identifier)
315    ///     .parse_iana_identifier()
316    ///     .unwrap();
317    /// assert_eq!(parse_result, iana_identifier.as_bytes());
318    /// ```
319    #[inline]
320    pub fn parse_iana_identifier(&mut self) -> ParserResult<&'a [u8]> {
321        let result = timezone::parse_tz_iana_name(&mut self.cursor)?;
322        self.cursor.close()?;
323        Ok(result)
324    }
325}
326
327/// A parser for ISO8601 Duration strings.
328///
329/// ✨ *Enabled with the `duration` Cargo feature.*
330///
331/// # Example
332///
333/// ```rust
334/// use ixdtf::parsers::{IsoDurationParser, records::{Sign, DurationParseRecord, TimeDurationRecord}};
335///
336/// let duration_str = "P1Y2M1DT2H10M30S";
337///
338/// let result = IsoDurationParser::from_str(duration_str).parse().unwrap();
339///
340/// let date_duration = result.date.unwrap();
341///
342/// let (hours, minutes, seconds, fraction) = match result.time {
343///     // Hours variant is defined as { hours: u32, fraction: Option<Fraction> }
344///     Some(TimeDurationRecord::Hours{ hours, fraction }) => (hours, 0, 0, fraction),
345///     // Minutes variant is defined as { hours: u32, minutes: u32, fraction: Option<Fraction> }
346///     Some(TimeDurationRecord::Minutes{ hours, minutes, fraction }) => (hours, minutes, 0, fraction),
347///     // Seconds variant is defined as { hours: u32, minutes: u32, seconds: u32, fraction: Option<Fraction> }
348///     Some(TimeDurationRecord::Seconds{ hours, minutes, seconds, fraction }) => (hours, minutes, seconds, fraction),
349///     None => (0,0,0, None),
350/// };
351///
352/// assert_eq!(result.sign, Sign::Positive);
353/// assert_eq!(date_duration.years, 1);
354/// assert_eq!(date_duration.months, 2);
355/// assert_eq!(date_duration.weeks, 0);
356/// assert_eq!(date_duration.days, 1);//
357/// assert_eq!(hours, 2);
358/// assert_eq!(minutes, 10);
359/// assert_eq!(seconds, 30);
360/// assert_eq!(fraction, None);
361/// ```
362#[cfg(feature = "duration")]
363#[derive(Debug)]
364pub struct IsoDurationParser<'a> {
365    cursor: Cursor<'a>,
366}
367
368#[cfg(feature = "duration")]
369impl<'a> IsoDurationParser<'a> {
370    /// Creates a new `IsoDurationParser` from a slice of utf-8 bytes.
371    #[inline]
372    #[must_use]
373    pub fn from_utf8(source: &'a [u8]) -> Self {
374        Self {
375            cursor: Cursor::new(source),
376        }
377    }
378
379    /// Creates a new `IsoDurationParser` from a source `&str`.
380    #[inline]
381    #[must_use]
382    #[allow(clippy::should_implement_trait)]
383    pub fn from_str(source: &'a str) -> Self {
384        Self::from_utf8(source.as_bytes())
385    }
386
387    /// Parse the contents of this `IsoDurationParser` into a `DurationParseRecord`.
388    ///
389    /// # Examples
390    ///
391    /// ## Parsing a date duration
392    ///
393    /// ```
394    /// # use ixdtf::parsers::{IsoDurationParser, records::DurationParseRecord };
395    /// let date_duration = "P1Y2M3W1D";
396    ///
397    /// let result = IsoDurationParser::from_str(date_duration).parse().unwrap();
398    ///
399    /// let date_duration = result.date.unwrap();
400    ///
401    /// assert!(result.time.is_none());
402    /// assert_eq!(date_duration.years, 1);
403    /// assert_eq!(date_duration.months, 2);
404    /// assert_eq!(date_duration.weeks, 3);
405    /// assert_eq!(date_duration.days, 1);
406    /// ```
407    ///
408    /// ## Parsing a time duration
409    ///
410    /// ```rust
411    /// # use ixdtf::parsers::{IsoDurationParser, records::{DurationParseRecord, TimeDurationRecord }};
412    /// let time_duration = "PT2H10M30S";
413    ///
414    /// let result = IsoDurationParser::from_str(time_duration).parse().unwrap();
415    ///
416    /// let (hours, minutes, seconds, fraction) = match result.time {
417    ///     // Hours variant is defined as { hours: u32, fraction: Option<Fraction> }
418    ///     Some(TimeDurationRecord::Hours{ hours, fraction }) => (hours, 0, 0, fraction),
419    ///     // Minutes variant is defined as { hours: u32, minutes: u32, fraction: Option<Fraction> }
420    ///     Some(TimeDurationRecord::Minutes{ hours, minutes, fraction }) => (hours, minutes, 0, fraction),
421    ///     // Seconds variant is defined as { hours: u32, minutes: u32, seconds: u32, fraction: Option<Fraction> }
422    ///     Some(TimeDurationRecord::Seconds{ hours, minutes, seconds, fraction }) => (hours, minutes, seconds, fraction),
423    ///     None => (0,0,0, None),
424    /// };
425    /// assert!(result.date.is_none());
426    /// assert_eq!(hours, 2);
427    /// assert_eq!(minutes, 10);
428    /// assert_eq!(seconds, 30);
429    /// assert_eq!(fraction, None);
430    /// ```
431    pub fn parse(&mut self) -> ParserResult<DurationParseRecord> {
432        duration::parse_duration(&mut self.cursor)
433    }
434}
435
436// ==== Mini cursor implementation for Iso8601 targets ====
437
438/// `Cursor` is a small cursor implementation for parsing Iso8601 grammar.
439#[derive(Debug)]
440pub(crate) struct Cursor<'a> {
441    pos: usize,
442    source: &'a [u8],
443}
444
445impl<'a> Cursor<'a> {
446    /// Create a new cursor from a source `String` value.
447    #[must_use]
448    pub fn new(source: &'a [u8]) -> Self {
449        Self { pos: 0, source }
450    }
451
452    /// Returns a string value from a slice of the cursor.
453    fn slice(&self, start: usize, end: usize) -> Option<&'a [u8]> {
454        self.source.get(start..end)
455    }
456
457    /// Get current position
458    const fn pos(&self) -> usize {
459        self.pos
460    }
461
462    /// Peek the value at next position (current + 1).
463    fn peek(&self) -> Option<u8> {
464        self.peek_n(1)
465    }
466
467    /// Returns current position in source as `char`.
468    fn current(&self) -> Option<u8> {
469        self.peek_n(0)
470    }
471
472    /// Peeks the value at `n` as a `char`.
473    fn peek_n(&self, n: usize) -> Option<u8> {
474        self.source.get(self.pos + n).copied()
475    }
476
477    /// Runs the provided check on the current position.
478    fn check<F>(&self, f: F) -> Option<bool>
479    where
480        F: FnOnce(u8) -> bool,
481    {
482        self.current().map(f)
483    }
484
485    /// Runs the provided check on current position returns the default value if None.
486    fn check_or<F>(&self, default: bool, f: F) -> bool
487    where
488        F: FnOnce(u8) -> bool,
489    {
490        self.current().map_or(default, f)
491    }
492
493    /// Returns `Cursor`'s current char and advances to the next position.
494    fn next(&mut self) -> Option<u8> {
495        let result = self.current();
496        self.advance_n(1);
497        result
498    }
499
500    /// Returns the next value as a digit
501    ///
502    /// # Errors
503    ///   - Returns an AbruptEnd error if cursor ends.
504    fn next_digit(&mut self) -> ParserResult<Option<u8>> {
505        let ascii_char = self.next_or(ParseError::AbruptEnd { location: "digit" })?;
506        if ascii_char.is_ascii_digit() {
507            Ok(Some(ascii_char - 48))
508        } else {
509            Ok(None)
510        }
511    }
512
513    /// A utility next method that returns an `AbruptEnd` error if invalid.
514    fn next_or(&mut self, err: ParseError) -> ParserResult<u8> {
515        self.next().ok_or(err)
516    }
517
518    /// Advances the cursor's position by n bytes.
519    fn advance_n(&mut self, n: usize) {
520        self.pos += n;
521    }
522
523    // Advances the cursor by 1 byte.
524    fn advance(&mut self) {
525        self.advance_n(1)
526    }
527
528    /// Utility function to advance when a condition is true
529    fn advance_if(&mut self, condition: bool) {
530        if condition {
531            self.advance();
532        }
533    }
534
535    /// Closes the current cursor by checking if all contents have been consumed. If not, returns an error for invalid syntax.
536    fn close(&mut self) -> ParserResult<()> {
537        if self.pos < self.source.len() {
538            return Err(ParseError::InvalidEnd);
539        }
540        Ok(())
541    }
542}