1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
//! Token streams and tools converting to and from them..
//!
//! *“What’s up?” “I don’t know,” said Marvin, “I’ve never been there.”*
//!
//! [`Stream`] is the primary type used to feed input data into a chumsky parser. You can create them in a number of
//! ways: from strings, iterators, arrays, etc.

use super::*;
use alloc::vec;

trait StreamExtend<T>: Iterator<Item = T> {
    /// Extend the vector with input. The actual amount can be more or less than `n`, but must be at least 1 (0 implies
    /// that the stream has been exhausted.
    fn extend(&mut self, v: &mut Vec<T>, n: usize);
}

#[allow(deprecated)]
impl<I: Iterator> StreamExtend<I::Item> for I {
    fn extend(&mut self, v: &mut Vec<I::Item>, n: usize) {
        v.reserve(n);
        v.extend(self.take(n));
    }
}

/// A utility type used to flatten input trees. See [`Stream::from_nested`].
pub enum Flat<I, Iter> {
    /// The input tree flattens into a single input.
    Single(I),
    /// The input tree flattens into many sub-trees.
    Many(Iter),
}

/// A type that represents a stream of input tokens. Unlike [`Iterator`], this type supports backtracking and a few
/// other features required by the crate.
#[allow(deprecated)]
pub struct Stream<
    'a,
    I,
    S: Span,
    Iter: Iterator<Item = (I, S)> + ?Sized = dyn Iterator<Item = (I, S)> + 'a,
> {
    pub(crate) phantom: PhantomData<&'a ()>,
    pub(crate) eoi: S,
    pub(crate) offset: usize,
    pub(crate) buffer: Vec<(I, S)>,
    pub(crate) iter: Iter,
}

/// A [`Stream`] that pulls tokens from a boxed [`Iterator`].
pub type BoxStream<'a, I, S> = Stream<'a, I, S, Box<dyn Iterator<Item = (I, S)> + 'a>>;

impl<'a, I, S: Span, Iter: Iterator<Item = (I, S)>> Stream<'a, I, S, Iter> {
    /// Create a new stream from an iterator of `(Token, Span)` pairs. A span representing the end of input must also
    /// be provided.
    ///
    /// There is no requirement that spans must map exactly to the position of inputs in the stream, but they should
    /// be non-overlapping and should appear in a monotonically-increasing order.
    pub fn from_iter(eoi: S, iter: Iter) -> Self {
        Self {
            phantom: PhantomData,
            eoi,
            offset: 0,
            buffer: Vec::new(),
            iter,
        }
    }

    /// Eagerly evaluate the token stream, returning an iterator over the tokens in it (but without modifying the
    /// stream's state so that it can still be used for parsing).
    ///
    /// This is most useful when you wish to check the input of a parser during debugging.
    pub fn fetch_tokens(&mut self) -> impl Iterator<Item = (I, S)> + '_
    where
        (I, S): Clone,
    {
        self.buffer.extend(&mut self.iter);
        self.buffer.iter().cloned()
    }
}

impl<'a, I: Clone, S: Span + 'a> BoxStream<'a, I, S> {
    /// Create a new `Stream` from an iterator of nested tokens and a function that flattens them.
    ///
    /// It's not uncommon for compilers to perform delimiter parsing during the lexing stage (Rust does this!). When
    /// this is done, the output of the lexing stage is usually a series of nested token trees. This functions allows
    /// you to easily flatten such token trees into a linear token stream so that they can be parsed (Chumsky currently
    /// only support parsing linear streams of inputs).
    ///
    /// For reference, [here](https://docs.rs/syn/0.11.1/syn/enum.TokenTree.html) is `syn`'s `TokenTree` type that it
    /// uses when parsing Rust syntax.
    ///
    /// # Examples
    ///
    /// ```
    /// # use chumsky::{Stream, BoxStream, Flat};
    /// type Span = std::ops::Range<usize>;
    ///
    /// fn span_at(at: usize) -> Span { at..at + 1 }
    ///
    /// #[derive(Clone)]
    /// enum Token {
    ///     Local(String),
    ///     Int(i64),
    ///     Bool(bool),
    ///     Add,
    ///     Sub,
    ///     OpenParen,
    ///     CloseParen,
    ///     OpenBrace,
    ///     CloseBrace,
    ///     // etc.
    /// }
    ///
    /// enum Delimiter {
    ///     Paren, // ( ... )
    ///     Brace, // { ... }
    /// }
    ///
    /// // The structure of this token tree is very similar to that which Rust uses.
    /// // See: https://docs.rs/syn/0.11.1/syn/enum.TokenTree.html
    /// enum TokenTree {
    ///     Token(Token),
    ///     Tree(Delimiter, Vec<(TokenTree, Span)>),
    /// }
    ///
    /// // A function that turns a series of nested token trees into a linear stream that can be used for parsing.
    /// fn flatten_tts(eoi: Span, token_trees: Vec<(TokenTree, Span)>) -> BoxStream<'static, Token, Span> {
    ///     use std::iter::once;
    ///     // Currently, this is quite an explicit process: it will likely become easier in future versions of Chumsky.
    ///     Stream::from_nested(
    ///         eoi,
    ///         token_trees.into_iter(),
    ///         |(tt, span)| match tt {
    ///             // For token trees that contain just a single token, no flattening needs to occur!
    ///             TokenTree::Token(token) => Flat::Single((token, span)),
    ///             // Flatten a parenthesised token tree into an iterator of the inner token trees, surrounded by parenthesis tokens
    ///             TokenTree::Tree(Delimiter::Paren, tree) => Flat::Many(once((TokenTree::Token(Token::OpenParen), span_at(span.start)))
    ///                 .chain(tree.into_iter())
    ///                 .chain(once((TokenTree::Token(Token::CloseParen), span_at(span.end - 1))))),
    ///             // Flatten a braced token tree into an iterator of the inner token trees, surrounded by brace tokens
    ///             TokenTree::Tree(Delimiter::Brace, tree) => Flat::Many(once((TokenTree::Token(Token::OpenBrace), span_at(span.start)))
    ///                 .chain(tree.into_iter())
    ///                 .chain(once((TokenTree::Token(Token::CloseBrace), span_at(span.end - 1))))),
    ///         }
    ///     )
    /// }
    /// ```
    pub fn from_nested<
        P: 'a,
        Iter: Iterator<Item = (P, S)>,
        Many: Iterator<Item = (P, S)>,
        F: FnMut((P, S)) -> Flat<(I, S), Many> + 'a,
    >(
        eoi: S,
        iter: Iter,
        mut flatten: F,
    ) -> Self {
        let mut v: Vec<alloc::collections::VecDeque<(P, S)>> = vec![iter.collect()];
        Self::from_iter(
            eoi,
            Box::new(core::iter::from_fn(move || loop {
                if let Some(many) = v.last_mut() {
                    match many.pop_front().map(&mut flatten) {
                        Some(Flat::Single(input)) => break Some(input),
                        Some(Flat::Many(many)) => v.push(many.collect()),
                        None => {
                            v.pop();
                        }
                    }
                } else {
                    break None;
                }
            })),
        )
    }
}

impl<'a, I: Clone, S: Span> Stream<'a, I, S> {
    pub(crate) fn offset(&self) -> usize {
        self.offset
    }

    pub(crate) fn save(&self) -> usize {
        self.offset
    }
    pub(crate) fn revert(&mut self, offset: usize) {
        self.offset = offset;
    }

    fn pull_until(&mut self, offset: usize) -> Option<&(I, S)> {
        let additional = offset.saturating_sub(self.buffer.len()) + 1024;
        #[allow(deprecated)]
        (&mut &mut self.iter as &mut dyn StreamExtend<_>).extend(&mut self.buffer, additional);
        self.buffer.get(offset)
    }

    pub(crate) fn skip_if(&mut self, f: impl FnOnce(&I) -> bool) -> bool {
        match self.pull_until(self.offset).cloned() {
            Some((out, _)) if f(&out) => {
                self.offset += 1;
                true
            }
            Some(_) => false,
            None => false,
        }
    }

    pub(crate) fn next(&mut self) -> (usize, S, Option<I>) {
        match self.pull_until(self.offset).cloned() {
            Some((out, span)) => {
                self.offset += 1;
                (self.offset - 1, span, Some(out))
            }
            None => (self.offset, self.eoi.clone(), None),
        }
    }

    pub(crate) fn span_since(&mut self, start_offset: usize) -> S {
        debug_assert!(
            start_offset <= self.offset,
            "{} > {}",
            self.offset,
            start_offset
        );
        let start = self
            .pull_until(start_offset)
            .as_ref()
            .map(|(_, s)| s.start())
            .unwrap_or_else(|| self.eoi.start());
        let end = self
            .pull_until(self.offset.saturating_sub(1).max(start_offset))
            .as_ref()
            .map(|(_, s)| s.end())
            .unwrap_or_else(|| self.eoi.end());
        S::new(self.eoi.context(), start..end)
    }

    pub(crate) fn attempt<R, F: FnOnce(&mut Self) -> (bool, R)>(&mut self, f: F) -> R {
        let old_offset = self.offset;
        let (commit, out) = f(self);
        if !commit {
            self.offset = old_offset;
        }
        out
    }

    pub(crate) fn try_parse<O, E, F: FnOnce(&mut Self) -> PResult<I, O, E>>(
        &mut self,
        f: F,
    ) -> PResult<I, O, E> {
        self.attempt(move |stream| {
            let out = f(stream);
            (out.1.is_ok(), out)
        })
    }
}

impl<'a> From<&'a str>
    for Stream<'a, char, Range<usize>, Box<dyn Iterator<Item = (char, Range<usize>)> + 'a>>
{
    /// Please note that Chumsky currently uses character indices and not byte offsets in this impl. This is likely to
    /// change in the future. If you wish to use byte offsets, you can do so with [`Stream::from_iter`].
    fn from(s: &'a str) -> Self {
        let len = s.chars().count();
        Self::from_iter(
            len..len,
            Box::new(s.chars().enumerate().map(|(i, c)| (c, i..i + 1))),
        )
    }
}

impl<'a> From<String>
    for Stream<'a, char, Range<usize>, Box<dyn Iterator<Item = (char, Range<usize>)>>>
{
    /// Please note that Chumsky currently uses character indices and not byte offsets in this impl. This is likely to
    /// change in the future. If you wish to use byte offsets, you can do so with [`Stream::from_iter`].
    fn from(s: String) -> Self {
        let chars = s.chars().collect::<Vec<_>>();
        Self::from_iter(
            chars.len()..chars.len(),
            Box::new(chars.into_iter().enumerate().map(|(i, c)| (c, i..i + 1))),
        )
    }
}

impl<'a, T: Clone> From<&'a [T]>
    for Stream<'a, T, Range<usize>, Box<dyn Iterator<Item = (T, Range<usize>)> + 'a>>
{
    fn from(s: &'a [T]) -> Self {
        let len = s.len();
        Self::from_iter(
            len..len,
            Box::new(s.iter().cloned().enumerate().map(|(i, x)| (x, i..i + 1))),
        )
    }
}

impl<'a, T: Clone + 'a> From<Vec<T>>
    for Stream<'a, T, Range<usize>, Box<dyn Iterator<Item = (T, Range<usize>)> + 'a>>
{
    fn from(s: Vec<T>) -> Self {
        let len = s.len();
        Self::from_iter(
            len..len,
            Box::new(s.into_iter().enumerate().map(|(i, x)| (x, i..i + 1))),
        )
    }
}

impl<'a, T: Clone + 'a, const N: usize> From<[T; N]>
    for Stream<'a, T, Range<usize>, Box<dyn Iterator<Item = (T, Range<usize>)> + 'a>>
{
    fn from(s: [T; N]) -> Self {
        Self::from_iter(
            N..N,
            Box::new(
                core::array::IntoIter::new(s)
                    .enumerate()
                    .map(|(i, x)| (x, i..i + 1)),
            ),
        )
    }
}

impl<'a, T: Clone, const N: usize> From<&'a [T; N]>
    for Stream<'a, T, Range<usize>, Box<dyn Iterator<Item = (T, Range<usize>)> + 'a>>
{
    fn from(s: &'a [T; N]) -> Self {
        Self::from_iter(
            N..N,
            Box::new(s.iter().cloned().enumerate().map(|(i, x)| (x, i..i + 1))),
        )
    }
}

// impl<'a, T: Clone, S: Clone + Span<Context = ()>> From<&'a [(T, S)]> for Stream<'a, T, S, Box<dyn Iterator<Item = (T, S)> + 'a>>
//     where S::Offset: Default
// {
//     fn from(s: &'a [(T, S)]) -> Self {
//         Self::from_iter(Default::default(), Box::new(s.iter().cloned()))
//     }
// }