ixdtf/parsers/mod.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! The parser module contains the implementation details for `IxdtfParser` and `IsoDurationParser`
6
7use crate::{ParseError, ParserResult};
8
9#[cfg(feature = "duration")]
10use records::DurationParseRecord;
11use records::{IxdtfParseRecord, UtcOffsetRecord};
12
13use self::records::Annotation;
14
15pub mod records;
16
17mod annotations;
18pub(crate) mod datetime;
19#[cfg(feature = "duration")]
20pub(crate) mod duration;
21mod grammar;
22mod time;
23pub(crate) mod timezone;
24
25#[cfg(test)]
26mod tests;
27
28/// `assert_syntax!` is a parser specific utility macro for asserting a syntax test, and returning the
29/// the provided provided error if the assertion fails.
30#[macro_export]
31macro_rules! assert_syntax {
32 ($cond:expr, $err:ident $(,)?) => {
33 if !$cond {
34 return Err(ParseError::$err);
35 }
36 };
37}
38
39/// `IxdtfParser` is the primary parser implementation of `ixdtf`.
40///
41/// This parser provides various options for parsing date/time strings with the extended notation
42/// laid out in [RFC9557][rfc9557] along with other variations laid out in the [`Temporal`][temporal-proposal].
43///
44/// ```rust
45/// use ixdtf::parsers::{
46/// records::{Sign, TimeZoneRecord, UtcOffsetRecord},
47/// IxdtfParser,
48/// };
49///
50/// let ixdtf_str = "2024-03-02T08:48:00-05:00[America/New_York]";
51///
52/// let result = IxdtfParser::from_str(ixdtf_str).parse().unwrap();
53///
54/// let date = result.date.unwrap();
55/// let time = result.time.unwrap();
56/// let offset = result.offset.unwrap().resolve_rfc_9557();
57/// let tz_annotation = result.tz.unwrap();
58///
59/// assert_eq!(date.year, 2024);
60/// assert_eq!(date.month, 3);
61/// assert_eq!(date.day, 2);
62/// assert_eq!(time.hour, 8);
63/// assert_eq!(time.minute, 48);
64/// assert_eq!(offset.sign(), Sign::Negative);
65/// assert_eq!(offset.hour(), 5);
66/// assert_eq!(offset.minute(), 0);
67/// assert_eq!(offset.second(), None);
68/// assert_eq!(offset.fraction(), None);
69/// assert!(!tz_annotation.critical);
70/// assert_eq!(
71/// tz_annotation.tz,
72/// TimeZoneRecord::Name("America/New_York".as_bytes())
73/// );
74/// ```
75///
76/// [rfc9557]: https://datatracker.ietf.org/doc/rfc9557/
77/// [temporal-proposal]: https://tc39.es/proposal-temporal/
78#[derive(Debug)]
79pub struct IxdtfParser<'a> {
80 cursor: Cursor<'a>,
81}
82
83impl<'a> IxdtfParser<'a> {
84 /// Creates a new `IxdtfParser` from a slice of utf-8 bytes.
85 #[inline]
86 #[must_use]
87 pub fn from_utf8(source: &'a [u8]) -> Self {
88 Self {
89 cursor: Cursor::new(source),
90 }
91 }
92
93 /// Creates a new `IxdtfParser` from a source `&str`.
94 #[inline]
95 #[must_use]
96 #[allow(clippy::should_implement_trait)]
97 pub fn from_str(source: &'a str) -> Self {
98 Self::from_utf8(source.as_bytes())
99 }
100
101 /// Parses the source as an [extended Date/Time string][rfc9557].
102 ///
103 /// This is the baseline parse method for `ixdtf`. For this method, the
104 /// TimeRecord, UTCOffsetRecord, and all annotations are optional.
105 ///
106 /// # Example
107 ///
108 /// [rfc9557]: https://datatracker.ietf.org/doc/rfc9557/
109 pub fn parse(&mut self) -> ParserResult<IxdtfParseRecord<'a>> {
110 self.parse_with_annotation_handler(Some)
111 }
112
113 /// Parses the source as an extended Date/Time string with an Annotation handler.
114 ///
115 /// For more, see [Implementing Annotation Handlers](crate#implementing-annotation-handlers)
116 pub fn parse_with_annotation_handler(
117 &mut self,
118 handler: impl FnMut(Annotation<'a>) -> Option<Annotation<'a>>,
119 ) -> ParserResult<IxdtfParseRecord<'a>> {
120 datetime::parse_annotated_date_time(&mut self.cursor, handler)
121 }
122
123 /// Parses the source as an extended [YearMonth string][temporal-ym].
124 ///
125 /// # Example
126 ///
127 /// ```rust
128 /// # use ixdtf::parsers::IxdtfParser;
129 ///
130 /// let extended_year_month = "2020-11[u-ca=iso8601]";
131 ///
132 /// let result = IxdtfParser::from_str(extended_year_month)
133 /// .parse_year_month()
134 /// .unwrap();
135 ///
136 /// let date = result.date.unwrap();
137 ///
138 /// assert_eq!(date.year, 2020);
139 /// assert_eq!(date.month, 11);
140 /// ```
141 ///
142 /// [temporal-ym]: https://tc39.es/proposal-temporal/#prod-TemporalYearMonthString
143 pub fn parse_year_month(&mut self) -> ParserResult<IxdtfParseRecord<'a>> {
144 self.parse_year_month_with_annotation_handler(Some)
145 }
146
147 /// Parses the source as an extended YearMonth string with an Annotation handler.
148 ///
149 /// For more, see [Implementing Annotation Handlers](crate#implementing-annotation-handlers)
150 pub fn parse_year_month_with_annotation_handler(
151 &mut self,
152 handler: impl FnMut(Annotation<'a>) -> Option<Annotation<'a>>,
153 ) -> ParserResult<IxdtfParseRecord<'a>> {
154 datetime::parse_annotated_year_month(&mut self.cursor, handler)
155 }
156
157 /// Parses the source as an extended [MonthDay string][temporal-md].
158 ///
159 /// # Example
160 ///
161 /// ```rust
162 /// # use ixdtf::parsers::IxdtfParser;
163 /// let extended_month_day = "1107[+04:00]";
164 ///
165 /// let result = IxdtfParser::from_str(extended_month_day)
166 /// .parse_month_day()
167 /// .unwrap();
168 ///
169 /// let date = result.date.unwrap();
170 ///
171 /// assert_eq!(date.month, 11);
172 /// assert_eq!(date.day, 7);
173 /// ```
174 ///
175 /// [temporal-md]: https://tc39.es/proposal-temporal/#prod-TemporalMonthDayString
176 pub fn parse_month_day(&mut self) -> ParserResult<IxdtfParseRecord<'a>> {
177 self.parse_month_day_with_annotation_handler(Some)
178 }
179
180 /// Parses the source as an extended MonthDay string with an Annotation handler.
181 ///
182 /// For more, see [Implementing Annotation Handlers](crate#implementing-annotation-handlers)
183 pub fn parse_month_day_with_annotation_handler(
184 &mut self,
185 handler: impl FnMut(Annotation<'a>) -> Option<Annotation<'a>>,
186 ) -> ParserResult<IxdtfParseRecord<'a>> {
187 datetime::parse_annotated_month_day(&mut self.cursor, handler)
188 }
189
190 /// Parses the source as an extended [Time string][temporal-time].
191 ///
192 /// # Example
193 ///
194 /// ```rust
195 /// # use ixdtf::parsers::{IxdtfParser, records::{Sign, TimeZoneRecord}};
196 /// let extended_time = "12:01:04-05:00[America/New_York][u-ca=iso8601]";
197 ///
198 /// let result = IxdtfParser::from_str(extended_time).parse_time().unwrap();
199 ///
200 /// let time = result.time.unwrap();
201 /// let offset = result.offset.unwrap().resolve_rfc_9557();
202 /// let tz_annotation = result.tz.unwrap();
203 ///
204 /// assert_eq!(time.hour, 12);
205 /// assert_eq!(time.minute, 1);
206 /// assert_eq!(time.second, 4);
207 /// assert_eq!(offset.sign(), Sign::Negative);
208 /// assert_eq!(offset.hour(), 5);
209 /// assert_eq!(offset.minute(), 0);
210 /// assert!(!tz_annotation.critical);
211 /// assert_eq!(
212 /// tz_annotation.tz,
213 /// TimeZoneRecord::Name("America/New_York".as_bytes())
214 /// );
215 /// ```
216 ///
217 /// [temporal-time]: https://tc39.es/proposal-temporal/#prod-TemporalTimeString
218 pub fn parse_time(&mut self) -> ParserResult<IxdtfParseRecord<'a>> {
219 self.parse_time_with_annotation_handler(Some)
220 }
221
222 /// Parses the source as an extended Time string with an Annotation handler.
223 ///
224 /// For more, see [Implementing Annotation Handlers](crate#implementing-annotation-handlers)
225 pub fn parse_time_with_annotation_handler(
226 &mut self,
227 handler: impl FnMut(Annotation<'a>) -> Option<Annotation<'a>>,
228 ) -> ParserResult<IxdtfParseRecord<'a>> {
229 time::parse_annotated_time_record(&mut self.cursor, handler)
230 }
231}
232
233/// A parser for time zone offset and IANA identifier strings.
234///
235/// ✨ *Enabled with the `timezone` Cargo feature.*
236#[derive(Debug)]
237pub struct TimeZoneParser<'a> {
238 cursor: Cursor<'a>,
239}
240
241impl<'a> TimeZoneParser<'a> {
242 /// Creates a new `TimeZoneParser` from a slice of utf-8 bytes.
243 #[inline]
244 #[must_use]
245 pub fn from_utf8(source: &'a [u8]) -> Self {
246 Self {
247 cursor: Cursor::new(source),
248 }
249 }
250
251 /// Creates a new `TimeZoneParser` from a source `&str`.
252 #[inline]
253 #[must_use]
254 #[allow(clippy::should_implement_trait)]
255 pub fn from_str(source: &'a str) -> Self {
256 Self::from_utf8(source.as_bytes())
257 }
258
259 /// Parse a UTC offset from the provided source.
260 ///
261 /// This method can parse both a minute precision and full
262 /// precision offset.
263 ///
264 /// ## Minute precision offset example
265 ///
266 /// ```rust
267 /// use ixdtf::parsers::{records::Sign, TimeZoneParser};
268 ///
269 /// let offset_src = "-05:00";
270 /// let parse_result =
271 /// TimeZoneParser::from_str(offset_src).parse_offset().unwrap();
272 /// assert_eq!(parse_result.sign(), Sign::Negative);
273 /// assert_eq!(parse_result.hour(), 5);
274 /// assert_eq!(parse_result.minute(), 0);
275 /// assert_eq!(parse_result.second(), None);
276 /// assert_eq!(parse_result.fraction(), None);
277 /// ```
278 ///
279 /// ## Full precision offset example
280 ///
281 /// ```rust
282 /// use ixdtf::parsers::{records::Sign, TimeZoneParser};
283 ///
284 /// let offset_src = "-05:00:30.123456789";
285 /// let parse_result =
286 /// TimeZoneParser::from_str(offset_src).parse_offset().unwrap();
287 /// assert_eq!(parse_result.sign(), Sign::Negative);
288 /// assert_eq!(parse_result.hour(), 5);
289 /// assert_eq!(parse_result.minute(), 0);
290 /// assert_eq!(parse_result.second(), Some(30));
291 /// let fraction = parse_result.fraction().unwrap();
292 /// assert_eq!(fraction.to_nanoseconds(), Some(123456789));
293 /// ```
294 #[inline]
295 pub fn parse_offset(&mut self) -> ParserResult<UtcOffsetRecord> {
296 let result = timezone::parse_utc_offset(&mut self.cursor)?;
297 self.cursor.close()?;
298 Ok(result)
299 }
300
301 /// Parse an IANA identifier name.
302 ///
303 ///
304 /// ```rust
305 /// use ixdtf::parsers::{records::Sign, TimeZoneParser};
306 ///
307 /// let iana_identifier = "America/Chicago";
308 /// let parse_result = TimeZoneParser::from_str(iana_identifier)
309 /// .parse_iana_identifier()
310 /// .unwrap();
311 /// assert_eq!(parse_result, iana_identifier.as_bytes());
312 ///
313 /// let iana_identifier = "Europe/Berlin";
314 /// let parse_result = TimeZoneParser::from_str(iana_identifier)
315 /// .parse_iana_identifier()
316 /// .unwrap();
317 /// assert_eq!(parse_result, iana_identifier.as_bytes());
318 /// ```
319 #[inline]
320 pub fn parse_iana_identifier(&mut self) -> ParserResult<&'a [u8]> {
321 let result = timezone::parse_tz_iana_name(&mut self.cursor)?;
322 self.cursor.close()?;
323 Ok(result)
324 }
325}
326
327/// A parser for ISO8601 Duration strings.
328///
329/// ✨ *Enabled with the `duration` Cargo feature.*
330///
331/// # Example
332///
333/// ```rust
334/// use ixdtf::parsers::{IsoDurationParser, records::{Sign, DurationParseRecord, TimeDurationRecord}};
335///
336/// let duration_str = "P1Y2M1DT2H10M30S";
337///
338/// let result = IsoDurationParser::from_str(duration_str).parse().unwrap();
339///
340/// let date_duration = result.date.unwrap();
341///
342/// let (hours, minutes, seconds, fraction) = match result.time {
343/// // Hours variant is defined as { hours: u32, fraction: Option<Fraction> }
344/// Some(TimeDurationRecord::Hours{ hours, fraction }) => (hours, 0, 0, fraction),
345/// // Minutes variant is defined as { hours: u32, minutes: u32, fraction: Option<Fraction> }
346/// Some(TimeDurationRecord::Minutes{ hours, minutes, fraction }) => (hours, minutes, 0, fraction),
347/// // Seconds variant is defined as { hours: u32, minutes: u32, seconds: u32, fraction: Option<Fraction> }
348/// Some(TimeDurationRecord::Seconds{ hours, minutes, seconds, fraction }) => (hours, minutes, seconds, fraction),
349/// None => (0,0,0, None),
350/// };
351///
352/// assert_eq!(result.sign, Sign::Positive);
353/// assert_eq!(date_duration.years, 1);
354/// assert_eq!(date_duration.months, 2);
355/// assert_eq!(date_duration.weeks, 0);
356/// assert_eq!(date_duration.days, 1);//
357/// assert_eq!(hours, 2);
358/// assert_eq!(minutes, 10);
359/// assert_eq!(seconds, 30);
360/// assert_eq!(fraction, None);
361/// ```
362#[cfg(feature = "duration")]
363#[derive(Debug)]
364pub struct IsoDurationParser<'a> {
365 cursor: Cursor<'a>,
366}
367
368#[cfg(feature = "duration")]
369impl<'a> IsoDurationParser<'a> {
370 /// Creates a new `IsoDurationParser` from a slice of utf-8 bytes.
371 #[inline]
372 #[must_use]
373 pub fn from_utf8(source: &'a [u8]) -> Self {
374 Self {
375 cursor: Cursor::new(source),
376 }
377 }
378
379 /// Creates a new `IsoDurationParser` from a source `&str`.
380 #[inline]
381 #[must_use]
382 #[allow(clippy::should_implement_trait)]
383 pub fn from_str(source: &'a str) -> Self {
384 Self::from_utf8(source.as_bytes())
385 }
386
387 /// Parse the contents of this `IsoDurationParser` into a `DurationParseRecord`.
388 ///
389 /// # Examples
390 ///
391 /// ## Parsing a date duration
392 ///
393 /// ```
394 /// # use ixdtf::parsers::{IsoDurationParser, records::DurationParseRecord };
395 /// let date_duration = "P1Y2M3W1D";
396 ///
397 /// let result = IsoDurationParser::from_str(date_duration).parse().unwrap();
398 ///
399 /// let date_duration = result.date.unwrap();
400 ///
401 /// assert!(result.time.is_none());
402 /// assert_eq!(date_duration.years, 1);
403 /// assert_eq!(date_duration.months, 2);
404 /// assert_eq!(date_duration.weeks, 3);
405 /// assert_eq!(date_duration.days, 1);
406 /// ```
407 ///
408 /// ## Parsing a time duration
409 ///
410 /// ```rust
411 /// # use ixdtf::parsers::{IsoDurationParser, records::{DurationParseRecord, TimeDurationRecord }};
412 /// let time_duration = "PT2H10M30S";
413 ///
414 /// let result = IsoDurationParser::from_str(time_duration).parse().unwrap();
415 ///
416 /// let (hours, minutes, seconds, fraction) = match result.time {
417 /// // Hours variant is defined as { hours: u32, fraction: Option<Fraction> }
418 /// Some(TimeDurationRecord::Hours{ hours, fraction }) => (hours, 0, 0, fraction),
419 /// // Minutes variant is defined as { hours: u32, minutes: u32, fraction: Option<Fraction> }
420 /// Some(TimeDurationRecord::Minutes{ hours, minutes, fraction }) => (hours, minutes, 0, fraction),
421 /// // Seconds variant is defined as { hours: u32, minutes: u32, seconds: u32, fraction: Option<Fraction> }
422 /// Some(TimeDurationRecord::Seconds{ hours, minutes, seconds, fraction }) => (hours, minutes, seconds, fraction),
423 /// None => (0,0,0, None),
424 /// };
425 /// assert!(result.date.is_none());
426 /// assert_eq!(hours, 2);
427 /// assert_eq!(minutes, 10);
428 /// assert_eq!(seconds, 30);
429 /// assert_eq!(fraction, None);
430 /// ```
431 pub fn parse(&mut self) -> ParserResult<DurationParseRecord> {
432 duration::parse_duration(&mut self.cursor)
433 }
434}
435
436// ==== Mini cursor implementation for Iso8601 targets ====
437
438/// `Cursor` is a small cursor implementation for parsing Iso8601 grammar.
439#[derive(Debug)]
440pub(crate) struct Cursor<'a> {
441 pos: usize,
442 source: &'a [u8],
443}
444
445impl<'a> Cursor<'a> {
446 /// Create a new cursor from a source `String` value.
447 #[must_use]
448 pub fn new(source: &'a [u8]) -> Self {
449 Self { pos: 0, source }
450 }
451
452 /// Returns a string value from a slice of the cursor.
453 fn slice(&self, start: usize, end: usize) -> Option<&'a [u8]> {
454 self.source.get(start..end)
455 }
456
457 /// Get current position
458 const fn pos(&self) -> usize {
459 self.pos
460 }
461
462 /// Peek the value at next position (current + 1).
463 fn peek(&self) -> Option<u8> {
464 self.peek_n(1)
465 }
466
467 /// Returns current position in source as `char`.
468 fn current(&self) -> Option<u8> {
469 self.peek_n(0)
470 }
471
472 /// Peeks the value at `n` as a `char`.
473 fn peek_n(&self, n: usize) -> Option<u8> {
474 self.source.get(self.pos + n).copied()
475 }
476
477 /// Runs the provided check on the current position.
478 fn check<F>(&self, f: F) -> Option<bool>
479 where
480 F: FnOnce(u8) -> bool,
481 {
482 self.current().map(f)
483 }
484
485 /// Runs the provided check on current position returns the default value if None.
486 fn check_or<F>(&self, default: bool, f: F) -> bool
487 where
488 F: FnOnce(u8) -> bool,
489 {
490 self.current().map_or(default, f)
491 }
492
493 /// Returns `Cursor`'s current char and advances to the next position.
494 fn next(&mut self) -> Option<u8> {
495 let result = self.current();
496 self.advance_n(1);
497 result
498 }
499
500 /// Returns the next value as a digit
501 ///
502 /// # Errors
503 /// - Returns an AbruptEnd error if cursor ends.
504 fn next_digit(&mut self) -> ParserResult<Option<u8>> {
505 let ascii_char = self.next_or(ParseError::AbruptEnd { location: "digit" })?;
506 if ascii_char.is_ascii_digit() {
507 Ok(Some(ascii_char - 48))
508 } else {
509 Ok(None)
510 }
511 }
512
513 /// A utility next method that returns an `AbruptEnd` error if invalid.
514 fn next_or(&mut self, err: ParseError) -> ParserResult<u8> {
515 self.next().ok_or(err)
516 }
517
518 /// Advances the cursor's position by n bytes.
519 fn advance_n(&mut self, n: usize) {
520 self.pos += n;
521 }
522
523 // Advances the cursor by 1 byte.
524 fn advance(&mut self) {
525 self.advance_n(1)
526 }
527
528 /// Utility function to advance when a condition is true
529 fn advance_if(&mut self, condition: bool) {
530 if condition {
531 self.advance();
532 }
533 }
534
535 /// Closes the current cursor by checking if all contents have been consumed. If not, returns an error for invalid syntax.
536 fn close(&mut self) -> ParserResult<()> {
537 if self.pos < self.source.len() {
538 return Err(ParseError::InvalidEnd);
539 }
540 Ok(())
541 }
542}