diff options
author | Claudio Bley <claudio.bley@gmail.com> | 2018-05-08 23:02:24 +0200 |
---|---|---|
committer | Claudio Bley <claudio.bley@gmail.com> | 2018-05-09 22:18:14 +0200 |
commit | 264d828654ff323595e28c8a1f08214481f40f9c (patch) | |
tree | 88a818734b16170ebd90176bb0e83fada510a5f7 | |
parent | 44bec613697e884df64634536554c611d3135b3b (diff) | |
download | milf-rs-264d828654ff323595e28c8a1f08214481f40f9c.tar.gz milf-rs-264d828654ff323595e28c8a1f08214481f40f9c.zip |
Treat unicode hex digits case-insensitively
In Rust >= 1.24.0 we could have used `char::is_ascii_hexdigit`, but to keep
compatiblity with older versions, `char::is_digit(16)` is used.
Fixes #240.
-rw-r--r-- | src/tokens.rs | 11 | ||||
-rw-r--r-- | test-suite/tests/valid/unicode-escape.json | 5 | ||||
-rw-r--r-- | test-suite/tests/valid/unicode-escape.toml | 3 |
3 files changed, 11 insertions, 8 deletions
diff --git a/src/tokens.rs b/src/tokens.rs index bcabd94..3f47f02 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -2,6 +2,7 @@ use std::borrow::Cow; use std::char; use std::str; use std::string; +use std::string::String as StdString; use self::Token::*; @@ -369,19 +370,15 @@ impl<'a> Tokenizer<'a> { } fn hex(&mut self, start: usize, i: usize, len: usize) -> Result<char, Error> { - let mut val = 0; + let mut buf = StdString::with_capacity(len); for _ in 0..len { match self.one() { - Some((_, ch)) if '0' <= ch && ch <= '9' => { - val = val * 16 + (ch as u32 - '0' as u32); - } - Some((_, ch)) if 'A' <= ch && ch <= 'F' => { - val = val * 16 + (ch as u32 - 'A' as u32) + 10; - } + Some((_, ch)) if ch as u32 <= 0x7F && ch.is_digit(16) => buf.push(ch), Some((i, ch)) => return Err(Error::InvalidHexEscape(i, ch)), None => return Err(Error::UnterminatedString(start)), } } + let val = u32::from_str_radix(&buf, 16).unwrap(); match char::from_u32(val) { Some(ch) => Ok(ch), None => Err(Error::InvalidEscapeValue(i, val)), diff --git a/test-suite/tests/valid/unicode-escape.json b/test-suite/tests/valid/unicode-escape.json index 32948c6..06fae70 100644 --- a/test-suite/tests/valid/unicode-escape.json +++ b/test-suite/tests/valid/unicode-escape.json @@ -1,5 +1,8 @@ { "answer1": {"type": "string", "value": "\u000B"}, "answer4": {"type": "string", "value": "\u03B4α"}, - "answer8": {"type": "string", "value": "\u03B4β"} + "answer8": {"type": "string", "value": "\u03B4β"}, + "answer9": {"type": "string", "value": "\uc0de"}, + "answer10": {"type": "string", "value": "\u03B4α"}, + "answer11": {"type": "string", "value": "\uABC1"} } diff --git a/test-suite/tests/valid/unicode-escape.toml b/test-suite/tests/valid/unicode-escape.toml index c0d5a25..6654252 100644 --- a/test-suite/tests/valid/unicode-escape.toml +++ b/test-suite/tests/valid/unicode-escape.toml @@ -1,3 +1,6 @@ answer1 = "\u000B" answer4 = "\u03B4α" answer8 = "\U000003B4β" +answer9 = "\uc0de" +answer10 = "\u03b4α" +answer11 = "\U0000abc1" |