1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
/// Partial percent-decoding.
///
/// Performs percent-decoding on a slice but can selectively skip decoding certain sequences.
///
/// # Examples
/// ```
/// # use actix_router::Quoter;
/// // + is set as a protected character and will not be decoded...
/// let q = Quoter::new(&[], b"+");
///
/// // ...but the other encoded characters (like the hyphen below) will.
/// assert_eq!(q.requote(b"/a%2Db%2Bc").unwrap(), b"/a-b%2Bc");
/// ```
pub struct Quoter {
    /// Simple bit-map of protected values in the 0-127 ASCII range.
    protected_table: AsciiBitmap,
}

impl Quoter {
    /// Constructs a new `Quoter` instance given a set of protected ASCII bytes.
    ///
    /// The first argument is ignored but is kept for backward compatibility.
    ///
    /// # Panics
    /// Panics if any of the `protected` bytes are not in the 0-127 ASCII range.
    pub fn new(_: &[u8], protected: &[u8]) -> Quoter {
        let mut protected_table = AsciiBitmap::default();

        // prepare protected table
        for &ch in protected {
            protected_table.set_bit(ch);
        }

        Quoter { protected_table }
    }

    /// Decodes the next escape sequence, if any, and advances `val`.
    #[inline(always)]
    fn decode_next<'a>(&self, val: &mut &'a [u8]) -> Option<(&'a [u8], u8)> {
        for i in 0..val.len() {
            if let (prev, [b'%', p1, p2, rem @ ..]) = val.split_at(i) {
                if let Some(ch) = hex_pair_to_char(*p1, *p2)
                    // ignore protected ascii bytes
                    .filter(|&ch| !(ch < 128 && self.protected_table.bit_at(ch)))
                {
                    *val = rem;
                    return Some((prev, ch));
                }
            }
        }

        None
    }

    /// Partially percent-decodes the given bytes.
    ///
    /// Escape sequences of the protected set are *not* decoded.
    ///
    /// Returns `None` when no modification to the original bytes was required.
    ///
    /// Invalid/incomplete percent-encoding sequences are passed unmodified.
    pub fn requote(&self, val: &[u8]) -> Option<Vec<u8>> {
        let mut remaining = val;

        // early return indicates that no percent-encoded sequences exist and we can skip allocation
        let (pre, decoded_char) = self.decode_next(&mut remaining)?;

        // decoded output will always be shorter than the input
        let mut decoded = Vec::<u8>::with_capacity(val.len());

        // push first segment and decoded char
        decoded.extend_from_slice(pre);
        decoded.push(decoded_char);

        // decode and push rest of segments and decoded chars
        while let Some((prev, ch)) = self.decode_next(&mut remaining) {
            // this ugly conditional achieves +50% perf in cases where this is a tight loop.
            if !prev.is_empty() {
                decoded.extend_from_slice(prev);
            }
            decoded.push(ch);
        }

        decoded.extend_from_slice(remaining);

        Some(decoded)
    }

    pub(crate) fn requote_str_lossy(&self, val: &str) -> Option<String> {
        self.requote(val.as_bytes())
            .map(|data| String::from_utf8_lossy(&data).into_owned())
    }
}

/// Decode a ASCII hex-encoded pair to an integer.
///
/// Returns `None` if either portion of the decoded pair does not evaluate to a valid hex value.
///
/// - `0x33 ('3'), 0x30 ('0') => 0x30 ('0')`
/// - `0x34 ('4'), 0x31 ('1') => 0x41 ('A')`
/// - `0x36 ('6'), 0x31 ('1') => 0x61 ('a')`
#[inline(always)]
fn hex_pair_to_char(d1: u8, d2: u8) -> Option<u8> {
    let d_high = char::from(d1).to_digit(16)?;
    let d_low = char::from(d2).to_digit(16)?;

    // left shift high nibble by 4 bits
    Some((d_high as u8) << 4 | (d_low as u8))
}

#[derive(Debug, Default, Clone)]
struct AsciiBitmap {
    array: [u8; 16],
}

impl AsciiBitmap {
    /// Sets bit in given bit-map to 1=true.
    ///
    /// # Panics
    /// Panics if `ch` index is out of bounds.
    fn set_bit(&mut self, ch: u8) {
        self.array[(ch >> 3) as usize] |= 0b1 << (ch & 0b111)
    }

    /// Returns true if bit to true in given bit-map.
    ///
    /// # Panics
    /// Panics if `ch` index is out of bounds.
    fn bit_at(&self, ch: u8) -> bool {
        self.array[(ch >> 3) as usize] & (0b1 << (ch & 0b111)) != 0
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn custom_quoter() {
        let q = Quoter::new(b"", b"+");
        assert_eq!(q.requote(b"/a%25c").unwrap(), b"/a%c");
        assert_eq!(q.requote(b"/a%2Bc"), None);

        let q = Quoter::new(b"%+", b"/");
        assert_eq!(q.requote(b"/a%25b%2Bc").unwrap(), b"/a%b+c");
        assert_eq!(q.requote(b"/a%2fb"), None);
        assert_eq!(q.requote(b"/a%2Fb"), None);
        assert_eq!(q.requote(b"/a%0Ab").unwrap(), b"/a\nb");
        assert_eq!(q.requote(b"/a%FE\xffb").unwrap(), b"/a\xfe\xffb");
        assert_eq!(q.requote(b"/a\xfe\xffb"), None);
    }

    #[test]
    fn non_ascii() {
        let q = Quoter::new(b"%+", b"/");
        assert_eq!(q.requote(b"/a%FE\xffb").unwrap(), b"/a\xfe\xffb");
        assert_eq!(q.requote(b"/a\xfe\xffb"), None);
    }

    #[test]
    fn invalid_sequences() {
        let q = Quoter::new(b"%+", b"/");
        assert_eq!(q.requote(b"/a%2x%2X%%"), None);
        assert_eq!(q.requote(b"/a%20%2X%%").unwrap(), b"/a %2X%%");
    }

    #[test]
    fn quoter_no_modification() {
        let q = Quoter::new(b"", b"");
        assert_eq!(q.requote(b"/abc/../efg"), None);
    }
}