ixdtf/parsers/
mod.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! The parser module contains the implementation details for `IxdtfParser` and `IsoDurationParser`
6
7use crate::core::{EncodingType, Utf16, Utf8};
8use crate::{core::Cursor, ParserResult};
9
10#[cfg(feature = "duration")]
11use crate::records::DurationParseRecord;
12use crate::records::{IxdtfParseRecord, TimeZoneRecord, UtcOffsetRecord};
13
14use crate::records::Annotation;
15
16mod annotations;
17pub(crate) mod datetime;
18#[cfg(feature = "duration")]
19pub(crate) mod duration;
20mod grammar;
21mod time;
22pub(crate) mod timezone;
23
24#[cfg(test)]
25mod tests;
26
27/// `assert_syntax!` is a parser specific utility macro for asserting a syntax test, and returning the
28/// the provided provided error if the assertion fails.
29#[macro_export]
30macro_rules! assert_syntax {
31    ($cond:expr, $err:ident $(,)?) => {
32        if !$cond {
33            return Err(ParseError::$err);
34        }
35    };
36}
37
38/// `IxdtfParser` is the primary parser implementation of `ixdtf`.
39///
40/// This parser provides various options for parsing date/time strings with the extended notation
41/// laid out in [RFC9557][rfc9557] along with other variations laid out in the [`Temporal`][temporal-proposal].
42///
43/// ```rust
44/// use ixdtf::{
45///     parsers::IxdtfParser,
46///     records::{Sign, TimeZoneRecord, UtcOffsetRecord},
47/// };
48///
49/// let ixdtf_str = "2024-03-02T08:48:00-05:00[America/New_York]";
50///
51/// let result = IxdtfParser::from_str(ixdtf_str).parse().unwrap();
52///
53/// let date = result.date.unwrap();
54/// let time = result.time.unwrap();
55/// let offset = result.offset.unwrap().resolve_rfc_9557();
56/// let tz_annotation = result.tz.unwrap();
57///
58/// assert_eq!(date.year, 2024);
59/// assert_eq!(date.month, 3);
60/// assert_eq!(date.day, 2);
61/// assert_eq!(time.hour, 8);
62/// assert_eq!(time.minute, 48);
63/// assert_eq!(offset.sign(), Sign::Negative);
64/// assert_eq!(offset.hour(), 5);
65/// assert_eq!(offset.minute(), 0);
66/// assert_eq!(offset.second(), None);
67/// assert_eq!(offset.fraction(), None);
68/// assert!(!tz_annotation.critical);
69/// assert_eq!(
70///     tz_annotation.tz,
71///     TimeZoneRecord::Name("America/New_York".as_bytes())
72/// );
73/// ```
74///
75/// [rfc9557]: https://datatracker.ietf.org/doc/rfc9557/
76/// [temporal-proposal]: https://tc39.es/proposal-temporal/
77#[derive(Debug)]
78pub struct IxdtfParser<'a, T: EncodingType> {
79    cursor: Cursor<'a, T>,
80}
81
82impl<'a> IxdtfParser<'a, Utf8> {
83    /// Creates a new `IxdtfParser` from a source `&str`.
84    #[inline]
85    #[must_use]
86    #[expect(clippy::should_implement_trait)]
87    pub fn from_str(source: &'a str) -> Self {
88        Self::from_utf8(source.as_bytes())
89    }
90
91    /// Creates a new `IxdtfParser` from a slice of utf-8 bytes.
92    #[inline]
93    #[must_use]
94    pub fn from_utf8(source: &'a [u8]) -> Self {
95        Self::new(source)
96    }
97}
98
99impl<'a> IxdtfParser<'a, Utf16> {
100    /// Creates a new `IxdtfParser` from a slice of utf-16 bytes.
101    pub fn from_utf16(source: &'a [u16]) -> Self {
102        Self::new(source)
103    }
104}
105
106impl<'a, T: EncodingType> IxdtfParser<'a, T> {
107    /// Create a new `IxdtfParser` for the specified encoding.
108    #[inline]
109    #[must_use]
110    pub fn new(source: &'a [T::CodeUnit]) -> Self {
111        Self {
112            cursor: Cursor::new(source),
113        }
114    }
115
116    /// Parses the source as an [extended Date/Time string][rfc9557].
117    ///
118    /// This is the baseline parse method for `ixdtf`. For this method, the
119    /// TimeRecord, UTCOffsetRecord, and all annotations are optional.
120    ///
121    /// # Example
122    ///
123    /// [rfc9557]: https://datatracker.ietf.org/doc/rfc9557/
124    pub fn parse(&mut self) -> ParserResult<IxdtfParseRecord<'a, T>> {
125        self.parse_with_annotation_handler(Some)
126    }
127
128    /// Parses the source as an extended Date/Time string with an Annotation handler.
129    ///
130    /// For more, see [Implementing Annotation Handlers](crate#implementing-annotation-handlers)
131    pub fn parse_with_annotation_handler(
132        &mut self,
133        handler: impl FnMut(Annotation<'a, T>) -> Option<Annotation<'a, T>>,
134    ) -> ParserResult<IxdtfParseRecord<'a, T>> {
135        datetime::parse_annotated_date_time(&mut self.cursor, handler)
136    }
137
138    /// Parses the source as an extended [YearMonth string][temporal-ym].
139    ///
140    /// # Example
141    ///
142    /// ```rust
143    /// # use ixdtf::parsers::IxdtfParser;
144    ///
145    /// let extended_year_month = "2020-11[u-ca=iso8601]";
146    ///
147    /// let result = IxdtfParser::from_str(extended_year_month)
148    ///     .parse_year_month()
149    ///     .unwrap();
150    ///
151    /// let date = result.date.unwrap();
152    ///
153    /// assert_eq!(date.year, 2020);
154    /// assert_eq!(date.month, 11);
155    /// ```
156    ///
157    /// [temporal-ym]: https://tc39.es/proposal-temporal/#prod-TemporalYearMonthString
158    pub fn parse_year_month(&mut self) -> ParserResult<IxdtfParseRecord<'a, T>> {
159        self.parse_year_month_with_annotation_handler(Some)
160    }
161
162    /// Parses the source as an extended YearMonth string with an Annotation handler.
163    ///
164    /// For more, see [Implementing Annotation Handlers](crate#implementing-annotation-handlers)
165    pub fn parse_year_month_with_annotation_handler(
166        &mut self,
167        handler: impl FnMut(Annotation<'a, T>) -> Option<Annotation<'a, T>>,
168    ) -> ParserResult<IxdtfParseRecord<'a, T>> {
169        datetime::parse_annotated_year_month(&mut self.cursor, handler)
170    }
171
172    /// Parses the source as an extended [MonthDay string][temporal-md].
173    ///
174    /// # Example
175    ///
176    /// ```rust
177    /// # use ixdtf::parsers::IxdtfParser;
178    /// let extended_month_day = "1107[+04:00]";
179    ///
180    /// let result = IxdtfParser::from_str(extended_month_day)
181    ///     .parse_month_day()
182    ///     .unwrap();
183    ///
184    /// let date = result.date.unwrap();
185    ///
186    /// assert_eq!(date.month, 11);
187    /// assert_eq!(date.day, 7);
188    /// ```
189    ///
190    /// [temporal-md]: https://tc39.es/proposal-temporal/#prod-TemporalMonthDayString
191    pub fn parse_month_day(&mut self) -> ParserResult<IxdtfParseRecord<'a, T>> {
192        self.parse_month_day_with_annotation_handler(Some)
193    }
194
195    /// Parses the source as an extended MonthDay string with an Annotation handler.
196    ///
197    /// For more, see [Implementing Annotation Handlers](crate#implementing-annotation-handlers)
198    pub fn parse_month_day_with_annotation_handler(
199        &mut self,
200        handler: impl FnMut(Annotation<'a, T>) -> Option<Annotation<'a, T>>,
201    ) -> ParserResult<IxdtfParseRecord<'a, T>> {
202        datetime::parse_annotated_month_day(&mut self.cursor, handler)
203    }
204
205    /// Parses the source as an extended [Time string][temporal-time].
206    ///
207    /// # Example
208    ///
209    /// ```rust
210    /// # use ixdtf::{parsers::IxdtfParser, records::{Sign, TimeZoneRecord}};
211    /// let extended_time = "12:01:04-05:00[America/New_York][u-ca=iso8601]";
212    ///
213    /// let result = IxdtfParser::from_str(extended_time).parse_time().unwrap();
214    ///
215    /// let time = result.time.unwrap();
216    /// let offset = result.offset.unwrap().resolve_rfc_9557();
217    /// let tz_annotation = result.tz.unwrap();
218    ///
219    /// assert_eq!(time.hour, 12);
220    /// assert_eq!(time.minute, 1);
221    /// assert_eq!(time.second, 4);
222    /// assert_eq!(offset.sign(), Sign::Negative);
223    /// assert_eq!(offset.hour(), 5);
224    /// assert_eq!(offset.minute(), 0);
225    /// assert!(!tz_annotation.critical);
226    /// assert_eq!(
227    ///     tz_annotation.tz,
228    ///     TimeZoneRecord::Name("America/New_York".as_bytes())
229    /// );
230    /// ```
231    ///
232    /// [temporal-time]: https://tc39.es/proposal-temporal/#prod-TemporalTimeString
233    pub fn parse_time(&mut self) -> ParserResult<IxdtfParseRecord<'a, T>> {
234        self.parse_time_with_annotation_handler(Some)
235    }
236
237    /// Parses the source as an extended Time string with an Annotation handler.
238    ///
239    /// For more, see [Implementing Annotation Handlers](crate#implementing-annotation-handlers)
240    pub fn parse_time_with_annotation_handler(
241        &mut self,
242        handler: impl FnMut(Annotation<'a, T>) -> Option<Annotation<'a, T>>,
243    ) -> ParserResult<IxdtfParseRecord<'a, T>> {
244        time::parse_annotated_time_record(&mut self.cursor, handler)
245    }
246}
247
248/// A parser for time zone offset and IANA identifier strings.
249///
250/// ✨ *Enabled with the `timezone` Cargo feature.*
251#[derive(Debug)]
252pub struct TimeZoneParser<'a, T: EncodingType> {
253    cursor: Cursor<'a, T>,
254}
255
256impl<'a> TimeZoneParser<'a, Utf8> {
257    /// Creates a new `TimeZoneParser` from a source `&str`.
258    #[inline]
259    #[must_use]
260    #[expect(clippy::should_implement_trait)]
261    pub fn from_str(source: &'a str) -> Self {
262        Self::from_utf8(source.as_bytes())
263    }
264
265    /// Creates a new `TimeZoneParser` from a slice of utf-8 bytes.
266    #[inline]
267    #[must_use]
268    pub fn from_utf8(source: &'a [u8]) -> Self {
269        Self::new(source)
270    }
271}
272
273impl<'a> TimeZoneParser<'a, Utf16> {
274    /// Creates a new `TimeZoneParser` from a slice of utf-16 bytes.
275    pub fn from_utf16(source: &'a [u16]) -> Self {
276        Self::new(source)
277    }
278}
279
280impl<'a, T: EncodingType> TimeZoneParser<'a, T> {
281    /// Creates a new `TimeZoneParser` for the provided encoding.
282    #[inline]
283    #[must_use]
284    pub fn new(source: &'a [T::CodeUnit]) -> Self {
285        Self {
286            cursor: Cursor::new(source),
287        }
288    }
289
290    /// Parse a time zone identifier that can be either an
291    /// IANA identifer name or minute precision offset.
292    ///
293    /// ## IANA identifier example
294    ///
295    /// ```rust
296    /// use ixdtf::{parsers::TimeZoneParser, records::TimeZoneRecord};
297    ///
298    /// let identifier = "Europe/London";
299    /// let record = TimeZoneParser::from_str(identifier)
300    ///     .parse_identifier()
301    ///     .unwrap();
302    /// assert_eq!(record, TimeZoneRecord::Name(identifier.as_bytes()))
303    /// ```
304    ///
305    /// ## Minute precision offset example
306    ///
307    /// ```rust
308    /// use ixdtf::{
309    ///     parsers::TimeZoneParser,
310    ///     records::{MinutePrecisionOffset, Sign, TimeZoneRecord},
311    /// };
312    ///
313    /// let identifier = "+00:00";
314    /// let offset = match TimeZoneParser::from_str(identifier).parse_identifier() {
315    ///     Ok(TimeZoneRecord::Offset(o)) => o,
316    ///     _ => unreachable!(),
317    /// };
318    ///
319    /// assert_eq!(offset.sign, Sign::Positive);
320    /// assert_eq!(offset.hour, 0);
321    /// assert_eq!(offset.minute, 0);
322    /// ```
323    ///
324    /// ## Errors
325    ///
326    /// It is an error to provide a full precision offset as a
327    /// time zone identifier.
328    ///
329    /// **NOTE**: To parse either a full or minute precision,
330    /// use [`Self::parse_offset`].
331    ///
332    /// ```rust
333    /// use ixdtf::{parsers::TimeZoneParser, ParseError};
334    ///
335    /// let identifier = "+00:00:00";
336    /// let err = TimeZoneParser::from_str(identifier)
337    ///     .parse_identifier()
338    ///     .unwrap_err();
339    /// assert_eq!(err, ParseError::InvalidMinutePrecisionOffset);
340    ///
341    /// let identifier = "+00:00.1";
342    /// let err = TimeZoneParser::from_str(identifier)
343    ///     .parse_identifier()
344    ///     .unwrap_err();
345    /// assert_eq!(err, ParseError::InvalidEnd);
346    /// ```
347    pub fn parse_identifier(&mut self) -> ParserResult<TimeZoneRecord<'a, T>> {
348        let result = timezone::parse_time_zone(&mut self.cursor)?;
349        self.cursor.close()?;
350        Ok(result)
351    }
352
353    /// Parse a UTC offset from the provided source.
354    ///
355    /// This method can parse both a minute precision and full
356    /// precision offset.
357    ///
358    /// ## Minute precision offset example
359    ///
360    /// ```rust
361    /// use ixdtf::{parsers::TimeZoneParser, records::Sign};
362    ///
363    /// let offset_src = "-05:00";
364    /// let parse_result =
365    ///     TimeZoneParser::from_str(offset_src).parse_offset().unwrap();
366    /// assert_eq!(parse_result.sign(), Sign::Negative);
367    /// assert_eq!(parse_result.hour(), 5);
368    /// assert_eq!(parse_result.minute(), 0);
369    /// assert_eq!(parse_result.second(), None);
370    /// assert_eq!(parse_result.fraction(), None);
371    /// ```
372    ///
373    /// ## Full precision offset example
374    ///
375    /// ```rust
376    /// use ixdtf::{parsers::TimeZoneParser, records::Sign};
377    ///
378    /// let offset_src = "-05:00:30.123456789";
379    /// let parse_result =
380    ///     TimeZoneParser::from_str(offset_src).parse_offset().unwrap();
381    /// assert_eq!(parse_result.sign(), Sign::Negative);
382    /// assert_eq!(parse_result.hour(), 5);
383    /// assert_eq!(parse_result.minute(), 0);
384    /// assert_eq!(parse_result.second(), Some(30));
385    /// let fraction = parse_result.fraction().unwrap();
386    /// assert_eq!(fraction.to_nanoseconds(), Some(123456789));
387    /// ```
388    #[inline]
389    pub fn parse_offset(&mut self) -> ParserResult<UtcOffsetRecord> {
390        let result = timezone::parse_utc_offset(&mut self.cursor)?;
391        self.cursor.close()?;
392        Ok(result)
393    }
394
395    /// Parse an IANA identifier name.
396    ///
397    ///
398    /// ```rust
399    /// use ixdtf::{parsers::TimeZoneParser, records::Sign};
400    ///
401    /// let iana_identifier = "America/Chicago";
402    /// let parse_result = TimeZoneParser::from_str(iana_identifier)
403    ///     .parse_iana_identifier()
404    ///     .unwrap();
405    /// assert_eq!(parse_result, iana_identifier.as_bytes());
406    ///
407    /// let iana_identifier = "Europe/Berlin";
408    /// let parse_result = TimeZoneParser::from_str(iana_identifier)
409    ///     .parse_iana_identifier()
410    ///     .unwrap();
411    /// assert_eq!(parse_result, iana_identifier.as_bytes());
412    /// ```
413    #[inline]
414    pub fn parse_iana_identifier(&mut self) -> ParserResult<&'a [T::CodeUnit]> {
415        let result = timezone::parse_tz_iana_name(&mut self.cursor)?;
416        self.cursor.close()?;
417        Ok(result)
418    }
419}
420
421/// A parser for ISO8601 Duration strings.
422///
423/// ✨ *Enabled with the `duration` Cargo feature.*
424///
425/// # Example
426///
427/// ```rust
428/// use ixdtf::{parsers::IsoDurationParser, records::{Sign, DurationParseRecord, TimeDurationRecord}};
429///
430/// let duration_str = "P1Y2M1DT2H10M30S";
431///
432/// let result = IsoDurationParser::from_str(duration_str).parse().unwrap();
433///
434/// let date_duration = result.date.unwrap();
435///
436/// let (hours, minutes, seconds, fraction) = match result.time {
437///     // Hours variant is defined as { hours: u32, fraction: Option<Fraction> }
438///     Some(TimeDurationRecord::Hours{ hours, fraction }) => (hours, 0, 0, fraction),
439///     // Minutes variant is defined as { hours: u32, minutes: u32, fraction: Option<Fraction> }
440///     Some(TimeDurationRecord::Minutes{ hours, minutes, fraction }) => (hours, minutes, 0, fraction),
441///     // Seconds variant is defined as { hours: u32, minutes: u32, seconds: u32, fraction: Option<Fraction> }
442///     Some(TimeDurationRecord::Seconds{ hours, minutes, seconds, fraction }) => (hours, minutes, seconds, fraction),
443///     None => (0,0,0, None),
444/// };
445///
446/// assert_eq!(result.sign, Sign::Positive);
447/// assert_eq!(date_duration.years, 1);
448/// assert_eq!(date_duration.months, 2);
449/// assert_eq!(date_duration.weeks, 0);
450/// assert_eq!(date_duration.days, 1);//
451/// assert_eq!(hours, 2);
452/// assert_eq!(minutes, 10);
453/// assert_eq!(seconds, 30);
454/// assert_eq!(fraction, None);
455/// ```
456#[cfg(feature = "duration")]
457#[derive(Debug)]
458pub struct IsoDurationParser<'a, T: EncodingType> {
459    cursor: Cursor<'a, T>,
460}
461
462#[cfg(feature = "duration")]
463impl<'a> IsoDurationParser<'a, Utf8> {
464    /// Creates a new `IsoDurationParser` from a source `&str`.
465    #[inline]
466    #[must_use]
467    #[expect(clippy::should_implement_trait)]
468    pub fn from_str(source: &'a str) -> Self {
469        Self::from_utf8(source.as_bytes())
470    }
471
472    /// Creates a new `IsoDurationParser` from a slice of utf-8 bytes.
473    #[inline]
474    #[must_use]
475    pub fn from_utf8(source: &'a [u8]) -> Self {
476        Self::new(source)
477    }
478}
479
480#[cfg(feature = "duration")]
481impl<'a> IsoDurationParser<'a, Utf16> {
482    /// Creates a new `IsoDurationParser` from a slice of utf-16 bytes.
483    #[inline]
484    #[must_use]
485    pub fn from_utf8(source: &'a [u16]) -> Self {
486        Self::new(source)
487    }
488}
489
490#[cfg(feature = "duration")]
491impl<'a, T: EncodingType> IsoDurationParser<'a, T> {
492    /// Creates a new `IsoDurationParser` for the provided encoding.
493    #[inline]
494    #[must_use]
495    pub fn new(source: &'a [T::CodeUnit]) -> Self {
496        Self {
497            cursor: Cursor::new(source),
498        }
499    }
500
501    /// Parse the contents of this `IsoDurationParser` into a `DurationParseRecord`.
502    ///
503    /// # Examples
504    ///
505    /// ## Parsing a date duration
506    ///
507    /// ```
508    /// # use ixdtf::{parsers::IsoDurationParser, records::DurationParseRecord };
509    /// let date_duration = "P1Y2M3W1D";
510    ///
511    /// let result = IsoDurationParser::from_str(date_duration).parse().unwrap();
512    ///
513    /// let date_duration = result.date.unwrap();
514    ///
515    /// assert!(result.time.is_none());
516    /// assert_eq!(date_duration.years, 1);
517    /// assert_eq!(date_duration.months, 2);
518    /// assert_eq!(date_duration.weeks, 3);
519    /// assert_eq!(date_duration.days, 1);
520    /// ```
521    ///
522    /// ## Parsing a time duration
523    ///
524    /// ```rust
525    /// # use ixdtf::{parsers::IsoDurationParser, records::{DurationParseRecord, TimeDurationRecord }};
526    /// let time_duration = "PT2H10M30S";
527    ///
528    /// let result = IsoDurationParser::from_str(time_duration).parse().unwrap();
529    ///
530    /// let (hours, minutes, seconds, fraction) = match result.time {
531    ///     // Hours variant is defined as { hours: u32, fraction: Option<Fraction> }
532    ///     Some(TimeDurationRecord::Hours{ hours, fraction }) => (hours, 0, 0, fraction),
533    ///     // Minutes variant is defined as { hours: u32, minutes: u32, fraction: Option<Fraction> }
534    ///     Some(TimeDurationRecord::Minutes{ hours, minutes, fraction }) => (hours, minutes, 0, fraction),
535    ///     // Seconds variant is defined as { hours: u32, minutes: u32, seconds: u32, fraction: Option<Fraction> }
536    ///     Some(TimeDurationRecord::Seconds{ hours, minutes, seconds, fraction }) => (hours, minutes, seconds, fraction),
537    ///     None => (0,0,0, None),
538    /// };
539    /// assert!(result.date.is_none());
540    /// assert_eq!(hours, 2);
541    /// assert_eq!(minutes, 10);
542    /// assert_eq!(seconds, 30);
543    /// assert_eq!(fraction, None);
544    /// ```
545    pub fn parse(&mut self) -> ParserResult<DurationParseRecord> {
546        duration::parse_duration(&mut self.cursor)
547    }
548}