diff options
| author | Alex Crichton <alex@alexcrichton.com> | 2018-12-17 17:45:35 -0800 | 
|---|---|---|
| committer | Alex Crichton <alex@alexcrichton.com> | 2018-12-17 17:45:35 -0800 | 
| commit | c1a369f44762045e65989caa9491e153d1f358e6 (patch) | |
| tree | 5fef35e1bd647687cdecda830134d5079d64ea4d /src/tokens.rs | |
| parent | 1ef180d06ed4ec207c41b2595feaca84959e709e (diff) | |
| download | milf-rs-c1a369f44762045e65989caa9491e153d1f358e6.tar.gz milf-rs-c1a369f44762045e65989caa9491e153d1f358e6.zip  | |
Run `cargo fmt`
Diffstat (limited to 'src/tokens.rs')
| -rw-r--r-- | src/tokens.rs | 359 | 
1 files changed, 201 insertions, 158 deletions
diff --git a/src/tokens.rs b/src/tokens.rs index 382c1ec..064c804 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -38,7 +38,11 @@ pub enum Token<'a> {      RightBracket,      Keylike(&'a str), -    String { src: &'a str, val: Cow<'a, str>, multiline: bool }, +    String { +        src: &'a str, +        val: Cow<'a, str>, +        multiline: bool, +    },  }  #[derive(Eq, PartialEq, Debug)] @@ -53,7 +57,11 @@ pub enum Error {      NewlineInTableKey(usize),      MultilineStringKey(usize),      EmptyTableKey(usize), -    Wanted { at: usize, expected: &'static str, found: &'static str }, +    Wanted { +        at: usize, +        expected: &'static str, +        found: &'static str, +    },  }  #[derive(Clone)] @@ -101,10 +109,16 @@ impl<'a> Tokenizer<'a> {              Some((start, '}')) => (start, RightBrace),              Some((start, '[')) => (start, LeftBracket),              Some((start, ']')) => (start, RightBracket), -            Some((start, '\'')) => return self.literal_string(start) -                .map(|t| Some((self.step_span(start), t))), -            Some((start, '"')) => return self.basic_string(start) -                .map(|t| Some((self.step_span(start), t))), +            Some((start, '\'')) => { +                return self +                    .literal_string(start) +                    .map(|t| Some((self.step_span(start), t))) +            } +            Some((start, '"')) => { +                return self +                    .basic_string(start) +                    .map(|t| Some((self.step_span(start), t))) +            }              Some((start, ch)) if is_keylike(ch) => (start, self.keylike(start)),              Some((start, ch)) => return Err(Error::Unexpected(start, ch)), @@ -156,13 +170,11 @@ impl<'a> Tokenizer<'a> {                      })                  }              } -            None => { -                Err(Error::Wanted { -                    at: self.input.len(), -                    expected: expected.describe(), -                    found: "eof", -                }) -            } +            None => Err(Error::Wanted { +                at: self.input.len(), +                expected: expected.describe(), +                found: "eof", +            }),          }      } @@ -170,33 +182,36 @@ impl<'a> Tokenizer<'a> {          let current = self.current();          match self.next()? {              Some((span, Token::Keylike(k))) => Ok((span, k.into())), -            Some((span, Token::String { src, val, multiline })) => { +            Some(( +                span, +                Token::String { +                    src, +                    val, +                    multiline, +                }, +            )) => {                  let offset = self.substr_offset(src);                  if multiline { -                    return Err(Error::MultilineStringKey(offset)) +                    return Err(Error::MultilineStringKey(offset));                  }                  if val == "" { -                    return Err(Error::EmptyTableKey(offset)) +                    return Err(Error::EmptyTableKey(offset));                  }                  match src.find('\n') {                      None => Ok((span, val)),                      Some(i) => Err(Error::NewlineInTableKey(offset + i)),                  }              } -            Some((_, other)) => { -                Err(Error::Wanted { -                    at: current, -                    expected: "a table key", -                    found: other.describe(), -                }) -            } -            None => { -                Err(Error::Wanted { -                    at: self.input.len(), -                    expected: "a table key", -                    found: "eof", -                }) -            } +            Some((_, other)) => Err(Error::Wanted { +                at: current, +                expected: "a table key", +                found: other.describe(), +            }), +            None => Err(Error::Wanted { +                at: self.input.len(), +                expected: "a table key", +                found: "eof", +            }),          }      } @@ -209,7 +224,7 @@ impl<'a> Tokenizer<'a> {      pub fn eat_comment(&mut self) -> Result<bool, Error> {          if !self.eatc('#') { -            return Ok(false) +            return Ok(false);          }          drop(self.comment_token(0));          self.eat_newline_or_eof().map(|()| true) @@ -218,23 +233,19 @@ impl<'a> Tokenizer<'a> {      pub fn eat_newline_or_eof(&mut self) -> Result<(), Error> {          let current = self.current();          match self.next()? { -            None | -            Some((_, Token::Newline)) => Ok(()), -            Some((_, other)) => { -                Err(Error::Wanted { -                    at: current, -                    expected: "newline", -                    found: other.describe(), -                }) -            } +            None | Some((_, Token::Newline)) => Ok(()), +            Some((_, other)) => Err(Error::Wanted { +                at: current, +                expected: "newline", +                found: other.describe(), +            }),          }      }      pub fn skip_to_newline(&mut self) {          loop {              match self.one() { -                Some((_, '\n')) | -                None => break, +                Some((_, '\n')) | None => break,                  _ => {}              }          } @@ -251,7 +262,11 @@ impl<'a> Tokenizer<'a> {      }      pub fn current(&mut self) -> usize { -        self.chars.clone().next().map(|i| i.0).unwrap_or(self.input.len()) +        self.chars +            .clone() +            .next() +            .map(|i| i.0) +            .unwrap_or(self.input.len())      }      pub fn input(&self) -> &'a str { @@ -268,30 +283,35 @@ impl<'a> Tokenizer<'a> {      fn comment_token(&mut self, start: usize) -> Token<'a> {          while let Some((_, ch)) = self.chars.clone().next() {              if ch != '\t' && (ch < '\u{20}' || ch > '\u{10ffff}') { -                break +                break;              }              self.one();          }          Comment(&self.input[start..self.current()])      } -    fn read_string(&mut self, -                   delim: char, -                   start: usize, -                   new_ch: &mut FnMut(&mut Tokenizer, &mut MaybeString, -                                      bool, usize, char) -                                     -> Result<(), Error>) -                   -> Result<Token<'a>, Error> { +    fn read_string( +        &mut self, +        delim: char, +        start: usize, +        new_ch: &mut FnMut( +            &mut Tokenizer, +            &mut MaybeString, +            bool, +            usize, +            char, +        ) -> Result<(), Error>, +    ) -> Result<Token<'a>, Error> {          let mut multiline = false;          if self.eatc(delim) {              if self.eatc(delim) {                  multiline = true;              } else {                  return Ok(String { -                    src: &self.input[start..start+2], +                    src: &self.input[start..start + 2],                      val: Cow::Borrowed(""),                      multiline: false, -                }) +                });              }          }          let mut val = MaybeString::NotEscaped(self.current()); @@ -309,9 +329,9 @@ impl<'a> Tokenizer<'a> {                          } else {                              val.push('\n');                          } -                        continue +                        continue;                      } else { -                        return Err(Error::NewlineInString(i)) +                        return Err(Error::NewlineInString(i));                      }                  }                  Some((i, ch)) if ch == delim => { @@ -319,7 +339,7 @@ impl<'a> Tokenizer<'a> {                          for _ in 0..2 {                              if !self.eatc(delim) {                                  val.push(delim); -                                continue 'outer +                                continue 'outer;                              }                          }                      } @@ -327,10 +347,10 @@ impl<'a> Tokenizer<'a> {                          src: &self.input[start..self.current()],                          val: val.into_cow(&self.input[..i]),                          multiline: multiline, -                    }) +                    });                  }                  Some((i, c)) => new_ch(self, &mut val, multiline, i, c)?, -                None => return Err(Error::UnterminatedString(start)) +                None => return Err(Error::UnterminatedString(start)),              }          }      } @@ -347,61 +367,56 @@ impl<'a> Tokenizer<'a> {      }      fn basic_string(&mut self, start: usize) -> Result<Token<'a>, Error> { -        self.read_string('"', start, &mut |me, val, multi, i, ch| { -            match ch { -                '\\' => { -                    val.to_owned(&me.input[..i]); -                    match me.chars.next() { -                        Some((_, '"')) => val.push('"'), -                        Some((_, '\\')) => val.push('\\'), -                        Some((_, 'b')) => val.push('\u{8}'), -                        Some((_, 'f')) => val.push('\u{c}'), -                        Some((_, 'n')) => val.push('\n'), -                        Some((_, 'r')) => val.push('\r'), -                        Some((_, 't')) => val.push('\t'), -                        Some((i, c @ 'u')) | -                        Some((i, c @ 'U')) => { -                            let len = if c == 'u' {4} else {8}; -                            val.push(me.hex(start, i, len)?); -                        } -                        Some((i, c @ ' ')) | -                        Some((i, c @ '\t')) | -                        Some((i, c @ '\n')) if multi => { -                            if c != '\n' { -                                while let Some((_, ch)) = me.chars.clone().next() { -                                    match ch { -                                        ' ' | '\t' => { -                                            me.chars.next(); -                                            continue -                                        }, -                                        '\n' => { -                                            me.chars.next(); -                                            break -                                        }, -                                        _ => return Err(Error::InvalidEscape(i, c)), -                                    } -                                } -                            } +        self.read_string('"', start, &mut |me, val, multi, i, ch| match ch { +            '\\' => { +                val.to_owned(&me.input[..i]); +                match me.chars.next() { +                    Some((_, '"')) => val.push('"'), +                    Some((_, '\\')) => val.push('\\'), +                    Some((_, 'b')) => val.push('\u{8}'), +                    Some((_, 'f')) => val.push('\u{c}'), +                    Some((_, 'n')) => val.push('\n'), +                    Some((_, 'r')) => val.push('\r'), +                    Some((_, 't')) => val.push('\t'), +                    Some((i, c @ 'u')) | Some((i, c @ 'U')) => { +                        let len = if c == 'u' { 4 } else { 8 }; +                        val.push(me.hex(start, i, len)?); +                    } +                    Some((i, c @ ' ')) | Some((i, c @ '\t')) | Some((i, c @ '\n')) if multi => { +                        if c != '\n' {                              while let Some((_, ch)) = me.chars.clone().next() {                                  match ch { -                                    ' ' | '\t' | '\n' => { +                                    ' ' | '\t' => { +                                        me.chars.next(); +                                        continue; +                                    } +                                    '\n' => {                                          me.chars.next(); +                                        break;                                      } -                                    _ => break, +                                    _ => return Err(Error::InvalidEscape(i, c)),                                  }                              }                          } -                        Some((i, c)) => return Err(Error::InvalidEscape(i, c)), -                        None => return Err(Error::UnterminatedString(start)), +                        while let Some((_, ch)) = me.chars.clone().next() { +                            match ch { +                                ' ' | '\t' | '\n' => { +                                    me.chars.next(); +                                } +                                _ => break, +                            } +                        }                      } -                    Ok(()) -                } -                ch if '\u{20}' <= ch && ch <= '\u{10ffff}' && ch != '\u{7f}' => { -                    val.push(ch); -                    Ok(()) +                    Some((i, c)) => return Err(Error::InvalidEscape(i, c)), +                    None => return Err(Error::UnterminatedString(start)),                  } -                _ => Err(Error::InvalidCharInString(i, ch)) +                Ok(()) +            } +            ch if '\u{20}' <= ch && ch <= '\u{10ffff}' && ch != '\u{7f}' => { +                val.push(ch); +                Ok(())              } +            _ => Err(Error::InvalidCharInString(i, ch)),          })      } @@ -424,7 +439,7 @@ impl<'a> Tokenizer<'a> {      fn keylike(&mut self, start: usize) -> Token<'a> {          while let Some((_, ch)) = self.peek_one() {              if !is_keylike(ch) { -                break +                break;              }              self.one();          } @@ -441,8 +456,14 @@ impl<'a> Tokenizer<'a> {      /// Calculate the span of a single character.      fn step_span(&mut self, start: usize) -> Span { -        let end = self.peek_one().map(|t| t.0).unwrap_or_else(|| self.input.len()); -        Span { start: start, end: end } +        let end = self +            .peek_one() +            .map(|t| t.0) +            .unwrap_or_else(|| self.input.len()); +        Span { +            start: start, +            end: end, +        }      }      /// Peek one char without consuming it. @@ -465,7 +486,7 @@ impl<'a> Iterator for CrlfFold<'a> {                  let mut attempt = self.chars.clone();                  if let Some((_, '\n')) = attempt.next() {                      self.chars = attempt; -                    return (i, '\n') +                    return (i, '\n');                  }              }              (i, c) @@ -499,11 +520,11 @@ impl MaybeString {  }  fn is_keylike(ch: char) -> bool { -    ('A' <= ch && ch <= 'Z') || -        ('a' <= ch && ch <= 'z') || -        ('0' <= ch && ch <= '9') || -        ch == '-' || -        ch == '_' +    ('A' <= ch && ch <= 'Z') +        || ('a' <= ch && ch <= 'z') +        || ('0' <= ch && ch <= '9') +        || ch == '-' +        || ch == '_'  }  impl<'a> Token<'a> { @@ -520,7 +541,13 @@ impl<'a> Token<'a> {              Token::LeftBrace => "a left brace",              Token::RightBracket => "a right bracket",              Token::LeftBracket => "a left bracket", -            Token::String { multiline, .. } => if multiline { "a multiline string" } else { "a string" }, +            Token::String { multiline, .. } => { +                if multiline { +                    "a multiline string" +                } else { +                    "a string" +                } +            }              Token::Colon => "a colon",              Token::Plus => "a plus",          } @@ -529,8 +556,8 @@ impl<'a> Token<'a> {  #[cfg(test)]  mod tests { +    use super::{Error, Token, Tokenizer};      use std::borrow::Cow; -    use super::{Tokenizer, Token, Error};      fn err(input: &str, err: Error) {          let mut t = Tokenizer::new(input); @@ -544,11 +571,14 @@ mod tests {          fn t(input: &str, val: &str, multiline: bool) {              let mut t = Tokenizer::new(input);              let (_, token) = t.next().unwrap().unwrap(); -            assert_eq!(token, Token::String { -                src: input, -                val: Cow::Borrowed(val), -                multiline: multiline, -            }); +            assert_eq!( +                token, +                Token::String { +                    src: input, +                    val: Cow::Borrowed(val), +                    multiline: multiline, +                } +            );              assert!(t.next().unwrap().is_none());          } @@ -567,11 +597,14 @@ mod tests {          fn t(input: &str, val: &str, multiline: bool) {              let mut t = Tokenizer::new(input);              let (_, token) = t.next().unwrap().unwrap(); -            assert_eq!(token, Token::String { -                src: input, -                val: Cow::Borrowed(val), -                multiline: multiline, -            }); +            assert_eq!( +                token, +                Token::String { +                    src: input, +                    val: Cow::Borrowed(val), +                    multiline: multiline, +                } +            );              assert!(t.next().unwrap().is_none());          } @@ -585,7 +618,11 @@ mod tests {          t(r#""\U000A0000""#, "\u{A0000}", false);          t(r#""\\t""#, "\\t", false);          t("\"\"\"\\\n\"\"\"", "", true); -        t("\"\"\"\\\n     \t   \t  \\\r\n  \t \n  \t \r\n\"\"\"", "", true); +        t( +            "\"\"\"\\\n     \t   \t  \\\r\n  \t \n  \t \r\n\"\"\"", +            "", +            true, +        );          t(r#""\r""#, "\r", false);          t(r#""\n""#, "\n", false);          t(r#""\b""#, "\u{8}", false); @@ -636,39 +673,45 @@ mod tests {              assert_eq!(actual.len(), expected.len());          } -        t(" a ", &[ -            ((0, 1), Token::Whitespace(" "), " "), -            ((1, 2), Token::Keylike("a"), "a"), -            ((2, 3), Token::Whitespace(" "), " "), -        ]); - -        t(" a\t [[]] \t [] {} , . =\n# foo \r\n#foo \n ", &[ -            ((0, 1), Token::Whitespace(" "), " "), -            ((1, 2), Token::Keylike("a"), "a"), -            ((2, 4), Token::Whitespace("\t "), "\t "), -            ((4, 5), Token::LeftBracket, "["), -            ((5, 6), Token::LeftBracket, "["), -            ((6, 7), Token::RightBracket, "]"), -            ((7, 8), Token::RightBracket, "]"), -            ((8, 11), Token::Whitespace(" \t "), " \t "), -            ((11, 12), Token::LeftBracket, "["), -            ((12, 13), Token::RightBracket, "]"), -            ((13, 14), Token::Whitespace(" "), " "), -            ((14, 15), Token::LeftBrace, "{"), -            ((15, 16), Token::RightBrace, "}"), -            ((16, 17), Token::Whitespace(" "), " "), -            ((17, 18), Token::Comma, ","), -            ((18, 19), Token::Whitespace(" "), " "), -            ((19, 20), Token::Period, "."), -            ((20, 21), Token::Whitespace(" "), " "), -            ((21, 22), Token::Equals, "="), -            ((22, 23), Token::Newline, "\n"), -            ((23, 29), Token::Comment("# foo "), "# foo "), -            ((29, 31), Token::Newline, "\r\n"), -            ((31, 36), Token::Comment("#foo "), "#foo "), -            ((36, 37), Token::Newline, "\n"), -            ((37, 38), Token::Whitespace(" "), " "), -        ]); +        t( +            " a ", +            &[ +                ((0, 1), Token::Whitespace(" "), " "), +                ((1, 2), Token::Keylike("a"), "a"), +                ((2, 3), Token::Whitespace(" "), " "), +            ], +        ); + +        t( +            " a\t [[]] \t [] {} , . =\n# foo \r\n#foo \n ", +            &[ +                ((0, 1), Token::Whitespace(" "), " "), +                ((1, 2), Token::Keylike("a"), "a"), +                ((2, 4), Token::Whitespace("\t "), "\t "), +                ((4, 5), Token::LeftBracket, "["), +                ((5, 6), Token::LeftBracket, "["), +                ((6, 7), Token::RightBracket, "]"), +                ((7, 8), Token::RightBracket, "]"), +                ((8, 11), Token::Whitespace(" \t "), " \t "), +                ((11, 12), Token::LeftBracket, "["), +                ((12, 13), Token::RightBracket, "]"), +                ((13, 14), Token::Whitespace(" "), " "), +                ((14, 15), Token::LeftBrace, "{"), +                ((15, 16), Token::RightBrace, "}"), +                ((16, 17), Token::Whitespace(" "), " "), +                ((17, 18), Token::Comma, ","), +                ((18, 19), Token::Whitespace(" "), " "), +                ((19, 20), Token::Period, "."), +                ((20, 21), Token::Whitespace(" "), " "), +                ((21, 22), Token::Equals, "="), +                ((22, 23), Token::Newline, "\n"), +                ((23, 29), Token::Comment("# foo "), "# foo "), +                ((29, 31), Token::Newline, "\r\n"), +                ((31, 36), Token::Comment("#foo "), "#foo "), +                ((36, 37), Token::Newline, "\n"), +                ((37, 38), Token::Whitespace(" "), " "), +            ], +        );      }      #[test]  |