From f66d8bcf33530c858a502bfa170f2383a8cbc204 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Sun, 29 Jan 2017 16:53:20 -0800 Subject: Rewrite crate with serde support from ground up This commit completely rewrites this crate from the ground up, supporting serde at the lowest levels as I believe serde support was intended to do. This is a major change from the previous versions of this crate, with a summary of changes being: * Serialization directly to TOML is now supported without going through a `Value` first. * Deserialization directly from TOML is now supported without going through a `Value`. Note that due to the TOML format some values still are buffered in intermediate memory, but overall this should be at a minimum now. * The API of `Value` was overhauled to match the API of `serde_json::Value`. The changes here were to: * Add `is_*` accessors * Add `get` and `get_mut` for one-field lookups. * Implement panicking lookups through `Index` The old `index` methods are now gone in favor of `get` and `Index` implementations. * A `Datetime` type has been added to represent a TOML datetime in a first-class fashion. Currently this type provides no accessors other than a `Display` implementation, but the idea is that this will grow support over time for decomposing the date. * Support for the `rustc-serialize` crate has been dropped, that'll stay on the 0.2 and 0.1 release trains. * This crate no longer supports the detection of unused fields, for that though you can use the `serde_ignored` crate on crates.io --- src/de.rs | 1195 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1195 insertions(+) create mode 100644 src/de.rs (limited to 'src/de.rs') diff --git a/src/de.rs b/src/de.rs new file mode 100644 index 0000000..7cb0410 --- /dev/null +++ b/src/de.rs @@ -0,0 +1,1195 @@ +//! Deserializing TOML into Rust structures. +//! +//! This module contains all the Serde support for deserializing TOML documents +//! into Rust structures. Note that some top-level functions here are also +//! provided at the top of the crate. + +use std::borrow::Cow; +use std::error; +use std::fmt; +use std::str; +use std::vec; + +use serde::de; + +use tokens::{Tokenizer, Token, Error as TokenError}; +use datetime::{SERDE_STRUCT_FIELD_NAME, SERDE_STRUCT_NAME}; + +/// Deserializes a byte slice into a type. +/// +/// This function will attempt to interpret `bytes` as UTF-8 data and then +/// deserialize `T` from the TOML document provided. +pub fn from_slice(bytes: &[u8]) -> Result + where T: de::Deserialize, +{ + match str::from_utf8(bytes) { + Ok(s) => from_str(s), + Err(e) => Err(Error::custom(e.to_string())), + } +} + +/// Deserializes a string into a type. +/// +/// This function will attempt to interpret `s` as a TOML document and +/// deserialize `T` from the document. +pub fn from_str(s: &str) -> Result + where T: de::Deserialize, +{ + let mut d = Deserializer::new(s); + let ret = T::deserialize(&mut d)?; + d.end()?; + return Ok(ret) +} + +/// Errors that can occur when deserializing a type. +#[derive(Debug, Clone)] +pub struct Error { + inner: Box, +} + +#[derive(Debug, Clone)] +struct ErrorInner { + kind: ErrorKind, + line: Option, + col: usize, + message: String, + key: Vec, +} + +/// Errors that can occur when deserializing a type. +#[derive(Debug, Clone)] +enum ErrorKind { + /// EOF was reached when looking for a value + UnexpectedEof, + + /// An invalid character not allowed in a string was found + InvalidCharInString(char), + + /// An invalid character was found as an escape + InvalidEscape(char), + + /// An invalid character was found in a hex escape + InvalidHexEscape(char), + + /// An invalid escape value was specified in a hex escape in a string. + /// + /// Valid values are in the plane of unicode codepoints. + InvalidEscapeValue(u32), + + /// A newline in a string was encountered when one was not allowed. + NewlineInString, + + /// An unexpected character was encountered, typically when looking for a + /// value. + Unexpected(char), + + /// An unterminated string was found where EOF was found before the ending + /// EOF mark. + UnterminatedString, + + /// A newline was found in a table key. + NewlineInTableKey, + + /// A number failed to parse + NumberInvalid, + + /// A date or datetime was invalid + DateInvalid, + + /// Wanted one sort of token, but found another. + Wanted { + /// Expected token type + expected: &'static str, + /// Actually found token type + found: &'static str, + }, + + /// An array was decoded but the types inside of it were mixed, which is + /// disallowed by TOML. + MixedArrayType, + + /// A duplicate table definition was found. + DuplicateTable(String), + + /// A previously defined table was redefined as an array. + RedefineAsArray, + + /// An empty table key was found. + EmptyTableKey, + + /// A custom error which could be generated when deserializing a particular + /// type. + Custom, + + #[doc(hidden)] + __Nonexhaustive, +} + +/// Deserialization implementation for TOML. +pub struct Deserializer<'a> { + require_newline_after_table: bool, + input: &'a str, + tokens: Tokenizer<'a>, +} + +impl<'a, 'b> de::Deserializer for &'b mut Deserializer<'a> { + type Error = Error; + + fn deserialize(self, visitor: V) -> Result + where V: de::Visitor, + { + let mut tables = Vec::new(); + let mut cur_table = Table { + at: 0, + header: Vec::new(), + values: None, + array: false, + }; + while let Some(line) = self.line()? { + match line { + Line::Table { at, mut header, array } => { + if cur_table.header.len() > 0 || cur_table.values.is_some() { + tables.push(cur_table); + } + cur_table = Table { + at: at, + header: Vec::new(), + values: Some(Vec::new()), + array: array, + }; + loop { + let part = header.next().map_err(|e| { + self.token_error(e) + }); + match part? { + Some(part) => cur_table.header.push(part), + None => break, + } + } + } + Line::KeyValue(key, value) => { + if cur_table.values.is_none() { + cur_table.values = Some(Vec::new()); + } + cur_table.values.as_mut().unwrap().push((key, value)); + } + } + } + if cur_table.header.len() > 0 || cur_table.values.is_some() { + tables.push(cur_table); + } + + visitor.visit_map(MapVisitor { + values: Vec::new().into_iter(), + next_value: None, + depth: 0, + cur: 0, + cur_parent: 0, + max: tables.len(), + tables: &mut tables, + array: false, + de: self, + }) + } + + forward_to_deserialize! { + bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq + seq_fixed_size bytes byte_buf map struct unit enum newtype_struct + struct_field ignored_any unit_struct tuple_struct tuple option + } +} + +struct Table<'a> { + at: usize, + header: Vec>, + values: Option, Value<'a>)>>, + array: bool, +} + +#[doc(hidden)] +pub struct MapVisitor<'a: 'b, 'b> { + values: vec::IntoIter<(Cow<'a, str>, Value<'a>)>, + next_value: Option<(Cow<'a, str>, Value<'a>)>, + depth: usize, + cur: usize, + cur_parent: usize, + max: usize, + tables: &'b mut [Table<'a>], + array: bool, + de: &'b mut Deserializer<'a>, +} + +impl<'a, 'b> de::MapVisitor for MapVisitor<'a, 'b> { + type Error = Error; + + fn visit_key_seed(&mut self, seed: K) -> Result, Error> + where K: de::DeserializeSeed, + { + if self.cur_parent == self.max || self.cur == self.max { + return Ok(None) + } + + loop { + assert!(self.next_value.is_none()); + if let Some((key, value)) = self.values.next() { + let ret = seed.deserialize(StrDeserializer::new(key.clone()))?; + self.next_value = Some((key, value)); + return Ok(Some(ret)) + } + + let next_table = { + let prefix = &self.tables[self.cur_parent].header[..self.depth]; + self.tables[self.cur..self.max].iter().enumerate().find(|&(_, t)| { + if t.values.is_none() { + return false + } + match t.header.get(..self.depth) { + Some(header) => header == prefix, + None => false, + } + }).map(|(i, _)| i + self.cur) + }; + + let pos = match next_table { + Some(pos) => pos, + None => return Ok(None), + }; + self.cur = pos; + + // Test to see if we're duplicating our parent's table, and if so + // then this is an error in the toml format + if self.cur_parent != pos && + self.tables[self.cur_parent].header == self.tables[pos].header { + let at = self.tables[pos].at; + let name = self.tables[pos].header.join("."); + return Err(self.de.error(at, ErrorKind::DuplicateTable(name))) + } + + let table = &mut self.tables[pos]; + + // If we're not yet at the appropriate depth for this table then we + // just visit the next portion of its header and then continue + // decoding. + if self.depth != table.header.len() { + let key = &table.header[self.depth]; + let key = seed.deserialize(StrDeserializer::new(key[..].into()))?; + return Ok(Some(key)) + } + + // Rule out cases like: + // + // [[foo.bar]] + // [[foo]] + if table.array { + let kind = ErrorKind::RedefineAsArray; + return Err(self.de.error(table.at, kind)) + } + + self.values = table.values.take().unwrap().into_iter(); + } + } + + fn visit_value_seed(&mut self, seed: V) -> Result + where V: de::DeserializeSeed, + { + if let Some((k, v)) = self.next_value.take() { + match seed.deserialize(ValueDeserializer::new(v)) { + Ok(v) => return Ok(v), + Err(mut e) => { + e.add_key_context(&k); + return Err(e) + } + } + } + + let array = self.tables[self.cur].array && + self.depth == self.tables[self.cur].header.len() - 1; + self.cur += 1; + let res = seed.deserialize(MapVisitor { + values: Vec::new().into_iter(), + next_value: None, + depth: self.depth + if array {0} else {1}, + cur_parent: self.cur - 1, + cur: 0, + max: self.max, + array: array, + tables: &mut *self.tables, + de: &mut *self.de, + }); + res.map_err(|mut e| { + e.add_key_context(&self.tables[self.cur - 1].header[self.depth]); + e + }) + } +} + +impl<'a, 'b> de::SeqVisitor for MapVisitor<'a, 'b> { + type Error = Error; + + fn visit_seed(&mut self, seed: K) -> Result, Error> + where K: de::DeserializeSeed, + { + assert!(self.next_value.is_none()); + assert!(self.values.next().is_none()); + + if self.cur_parent == self.max { + return Ok(None) + } + + let next = self.tables[..self.max] + .iter() + .enumerate() + .skip(self.cur_parent + 1) + .find(|&(_, table)| { + table.array && table.header == self.tables[self.cur_parent].header + }).map(|p| p.0) + .unwrap_or(self.max); + + let ret = seed.deserialize(MapVisitor { + values: self.tables[self.cur_parent].values.take().unwrap().into_iter(), + next_value: None, + depth: self.depth + 1, + cur_parent: self.cur_parent, + max: next, + cur: 0, + array: false, + tables: &mut self.tables, + de: &mut self.de, + })?; + self.cur_parent = next; + return Ok(Some(ret)) + } +} + +impl<'a, 'b> de::Deserializer for MapVisitor<'a, 'b> { + type Error = Error; + + fn deserialize(self, visitor: V) -> Result + where V: de::Visitor, + { + if self.array { + visitor.visit_seq(self) + } else { + visitor.visit_map(self) + } + } + + // `None` is interpreted as a missing field so be sure to implement `Some` + // as a present field. + fn deserialize_option(self, visitor: V) -> Result + where V: de::Visitor + { + visitor.visit_some(self) + } + + forward_to_deserialize! { + bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq + seq_fixed_size bytes byte_buf map struct unit newtype_struct + struct_field ignored_any unit_struct tuple_struct tuple enum + } +} + +struct StrDeserializer<'a> { + key: Cow<'a, str>, +} + +impl<'a> StrDeserializer<'a> { + fn new(key: Cow<'a, str>) -> StrDeserializer<'a> { + StrDeserializer { + key: key, + } + } +} + +impl<'a> de::Deserializer for StrDeserializer<'a> { + type Error = Error; + + fn deserialize(self, visitor: V) -> Result + where V: de::Visitor, + { + match self.key { + Cow::Borrowed(s) => visitor.visit_str(s), + Cow::Owned(s) => visitor.visit_string(s), + } + } + + forward_to_deserialize! { + bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq + seq_fixed_size bytes byte_buf map struct option unit newtype_struct + struct_field ignored_any unit_struct tuple_struct tuple enum + } +} + +struct ValueDeserializer<'a> { + value: Value<'a>, +} + +impl<'a> ValueDeserializer<'a> { + fn new(value: Value<'a>) -> ValueDeserializer<'a> { + ValueDeserializer { + value: value, + } + } +} + +impl<'a> de::Deserializer for ValueDeserializer<'a> { + type Error = Error; + + fn deserialize(self, visitor: V) -> Result + where V: de::Visitor, + { + match self.value { + Value::Integer(i) => visitor.visit_i64(i), + Value::Boolean(b) => visitor.visit_bool(b), + Value::Float(f) => visitor.visit_f64(f), + Value::String(Cow::Borrowed(s)) => visitor.visit_str(s), + Value::String(Cow::Owned(s)) => visitor.visit_string(s), + Value::Datetime(s) => visitor.visit_map(DatetimeDeserializer { + date: s, + visited: false, + }), + Value::Array(values) => { + let mut s = de::value::SeqDeserializer::new(values.into_iter()); + let ret = visitor.visit_seq(&mut s)?; + s.end()?; + Ok(ret) + } + Value::InlineTable(values) => { + visitor.visit_map(InlineTableDeserializer { + values: values.into_iter(), + next_value: None, + }) + } + } + } + + fn deserialize_struct(self, + name: &'static str, + fields: &'static [&'static str], + visitor: V) -> Result + where V: de::Visitor, + { + if name == SERDE_STRUCT_NAME && fields == &[SERDE_STRUCT_FIELD_NAME] { + if let Value::Datetime(ref s) = self.value { + return visitor.visit_map(DatetimeDeserializer { + date: s, + visited: false, + }) + } + } + + self.deserialize(visitor) + } + + // `None` is interpreted as a missing field so be sure to implement `Some` + // as a present field. + fn deserialize_option(self, visitor: V) -> Result + where V: de::Visitor + { + visitor.visit_some(self) + } + + forward_to_deserialize! { + bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq + seq_fixed_size bytes byte_buf map unit newtype_struct + struct_field ignored_any unit_struct tuple_struct tuple enum + } +} + +impl<'a> de::value::ValueDeserializer for Value<'a> { + type Deserializer = ValueDeserializer<'a>; + + fn into_deserializer(self) -> Self::Deserializer { + ValueDeserializer::new(self) + } +} + +struct DatetimeDeserializer<'a> { + visited: bool, + date: &'a str, +} + +impl<'a> de::MapVisitor for DatetimeDeserializer<'a> { + type Error = Error; + + fn visit_key_seed(&mut self, seed: K) -> Result, Error> + where K: de::DeserializeSeed, + { + if self.visited { + return Ok(None) + } + self.visited = true; + seed.deserialize(DatetimeFieldDeserializer).map(Some) + } + + fn visit_value_seed(&mut self, seed: V) -> Result + where V: de::DeserializeSeed, + { + seed.deserialize(StrDeserializer::new(self.date.into())) + } +} + +struct DatetimeFieldDeserializer; + +impl de::Deserializer for DatetimeFieldDeserializer { + type Error = Error; + + fn deserialize(self, visitor: V) -> Result + where V: de::Visitor, + { + visitor.visit_str(SERDE_STRUCT_FIELD_NAME) + } + + forward_to_deserialize! { + bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string seq + seq_fixed_size bytes byte_buf map struct option unit newtype_struct + struct_field ignored_any unit_struct tuple_struct tuple enum + } +} + +struct InlineTableDeserializer<'a> { + values: vec::IntoIter<(Cow<'a, str>, Value<'a>)>, + next_value: Option>, +} + +impl<'a> de::MapVisitor for InlineTableDeserializer<'a> { + type Error = Error; + + fn visit_key_seed(&mut self, seed: K) -> Result, Error> + where K: de::DeserializeSeed, + { + let (key, value) = match self.values.next() { + Some(pair) => pair, + None => return Ok(None), + }; + self.next_value = Some(value); + seed.deserialize(StrDeserializer::new(key)).map(Some) + } + + fn visit_value_seed(&mut self, seed: V) -> Result + where V: de::DeserializeSeed, + { + let value = self.next_value.take().unwrap(); + seed.deserialize(ValueDeserializer::new(value)) + } +} + +impl<'a> Deserializer<'a> { + /// Creates a new deserializer which will be deserializing the string + /// provided. + pub fn new(input: &'a str) -> Deserializer<'a> { + Deserializer { + tokens: Tokenizer::new(input), + input: input, + require_newline_after_table: false, + } + } + + /// The `Deserializer::end` method should be called after a value has been + /// fully deserialized. This allows the `Deserializer` to validate that the + /// input stream is at the end or that it only has trailing + /// whitespace/comments. + pub fn end(&mut self) -> Result<(), Error> { + Ok(()) + } + + /// Historical versions of toml-rs accidentally allowed a newline after a + /// table definition, but the TOML spec requires a newline after a table + /// definition header. + /// + /// This option can be set to `false` (the default is `true`) to emulate + /// this behavior for backwards compatibility with older toml-rs versions. + pub fn set_require_newline_after_table(&mut self, require: bool) { + self.require_newline_after_table = require; + } + + fn line(&mut self) -> Result>, Error> { + loop { + self.eat_whitespace()?; + if self.eat_comment()? { + continue + } + if self.eat(Token::Newline)? { + continue + } + break + } + + match self.peek()? { + Some(Token::LeftBracket) => self.table_header().map(Some), + Some(_) => self.key_value().map(Some), + None => Ok(None), + } + } + + fn table_header(&mut self) -> Result, Error> { + let start = self.tokens.current(); + self.expect(Token::LeftBracket)?; + let array = self.eat(Token::LeftBracket)?; + let ret = Header::new(self.tokens.clone(), array); + self.tokens.skip_to_newline(); + Ok(Line::Table { at: start, header: ret, array: array }) + } + + fn key_value(&mut self) -> Result, Error> { + let key = self.table_key()?; + self.eat_whitespace()?; + self.expect(Token::Equals)?; + self.eat_whitespace()?; + + let value = self.value()?; + self.eat_whitespace()?; + if !self.eat_comment()? { + self.eat_newline_or_eof()?; + } + + Ok(Line::KeyValue(key, value)) + } + + fn value(&mut self) -> Result, Error> { + let at = self.tokens.current(); + let value = match self.next()? { + Some(Token::String { val, .. }) => Value::String(val), + Some(Token::Keylike("true")) => Value::Boolean(true), + Some(Token::Keylike("false")) => Value::Boolean(false), + Some(Token::Keylike(key)) => self.number_or_date(key)?, + Some(Token::Plus) => self.number_leading_plus()?, + Some(Token::LeftBrace) => self.inline_table().map(Value::InlineTable)?, + Some(Token::LeftBracket) => self.array().map(Value::Array)?, + Some(token) => { + return Err(self.error(at, ErrorKind::Wanted { + expected: "a value", + found: token.describe(), + })) + } + None => return Err(self.eof()), + }; + Ok(value) + } + + fn number_or_date(&mut self, s: &'a str) -> Result, Error> { + if s.contains("T") || (s.len() > 1 && s[1..].contains("-")) && + !s.contains("e-") { + self.datetime(s, false).map(Value::Datetime) + } else if self.eat(Token::Colon)? { + self.datetime(s, true).map(Value::Datetime) + } else { + self.number(s) + } + } + + fn number(&mut self, s: &'a str) -> Result, Error> { + if s.contains("e") || s.contains("E") { + self.float(s, None).map(Value::Float) + } else if self.eat(Token::Period)? { + let at = self.tokens.current(); + match self.next()? { + Some(Token::Keylike(after)) => { + self.float(s, Some(after)).map(Value::Float) + } + _ => Err(self.error(at, ErrorKind::NumberInvalid)), + } + } else { + self.integer(s).map(Value::Integer) + } + } + + fn number_leading_plus(&mut self) -> Result, Error> { + let start = self.tokens.current(); + match self.next()? { + Some(Token::Keylike(s)) => self.number(s), + _ => Err(self.error(start, ErrorKind::NumberInvalid)), + } + } + + fn integer(&self, s: &'a str) -> Result { + let (prefix, suffix) = self.parse_integer(s, true, false)?; + let start = self.tokens.substr_offset(s); + if suffix != "" { + return Err(self.error(start, ErrorKind::NumberInvalid)) + } + prefix.replace("_", "").trim_left_matches("+").parse().map_err(|_e| { + self.error(start, ErrorKind::NumberInvalid) + }) + } + + fn parse_integer(&self, + s: &'a str, + allow_sign: bool, + allow_leading_zeros: bool) + -> Result<(&'a str, &'a str), Error> { + let start = self.tokens.substr_offset(s); + + let mut first = true; + let mut first_zero = false; + let mut underscore = false; + let mut end = s.len(); + for (i, c) in s.char_indices() { + let at = i + start; + if i == 0 && (c == '+' || c == '-') && allow_sign { + continue + } + + match c { + '0' if first => first_zero = true, + '0' ... '9' if !first && first_zero && !allow_leading_zeros => { + return Err(self.error(at, ErrorKind::NumberInvalid)) + } + '0' ... '9' => underscore = false, + '_' if first => { + return Err(self.error(at, ErrorKind::NumberInvalid)) + } + '_' if !underscore => underscore = true, + _ => { + end = i; + break + } + + } + first = false; + } + if first || underscore { + return Err(self.error(start, ErrorKind::NumberInvalid)) + } + Ok((&s[..end], &s[end..])) + } + + fn float(&mut self, s: &'a str, after_decimal: Option<&'a str>) + -> Result { + let (integral, mut suffix) = self.parse_integer(s, true, false)?; + let start = self.tokens.substr_offset(integral); + + let mut fraction = None; + if let Some(after) = after_decimal { + if suffix != "" { + return Err(self.error(start, ErrorKind::NumberInvalid)) + } + let (a, b) = self.parse_integer(&after, false, true)?; + fraction = Some(a); + suffix = b; + } + + let mut exponent = None; + if suffix.starts_with("e") || suffix.starts_with("E") { + let (a, b) = if suffix.len() == 1 { + self.eat(Token::Plus)?; + match self.next()? { + Some(Token::Keylike(s)) => { + self.parse_integer(s, false, false)? + } + _ => return Err(self.error(start, ErrorKind::NumberInvalid)), + } + } else { + self.parse_integer(&suffix[1..], true, false)? + }; + if b != "" { + return Err(self.error(start, ErrorKind::NumberInvalid)) + } + exponent = Some(a); + } + + let mut number = integral.trim_left_matches("+") + .chars() + .filter(|c| *c != '_') + .collect::(); + if let Some(fraction) = fraction { + number.push_str("."); + number.extend(fraction.chars().filter(|c| *c != '_')); + } + if let Some(exponent) = exponent { + number.push_str("E"); + number.extend(exponent.chars().filter(|c| *c != '_')); + } + number.parse().map_err(|_e| { + self.error(start, ErrorKind::NumberInvalid) + }) + } + + fn datetime(&mut self, date: &'a str, colon_eaten: bool) + -> Result<&'a str, Error> { + let start = self.tokens.substr_offset(date); + if colon_eaten || self.eat(Token::Colon)? { + // minutes + match self.next()? { + Some(Token::Keylike(_)) => {} + _ => return Err(self.error(start, ErrorKind::DateInvalid)), + } + // Seconds + self.expect(Token::Colon)?; + match self.next()? { + Some(Token::Keylike(_)) => {} + _ => return Err(self.error(start, ErrorKind::DateInvalid)), + } + // Fractional seconds + if self.eat(Token::Period)? { + match self.next()? { + Some(Token::Keylike(_)) => {} + _ => return Err(self.error(start, ErrorKind::DateInvalid)), + } + } + + // offset + if self.eat(Token::Plus)? { + match self.next()? { + Some(Token::Keylike(_)) => {} + _ => return Err(self.error(start, ErrorKind::DateInvalid)), + } + } + if self.eat(Token::Colon)? { + match self.next()? { + Some(Token::Keylike(_)) => {} + _ => return Err(self.error(start, ErrorKind::DateInvalid)), + } + } + } + let end = self.tokens.current(); + Ok(&self.tokens.input()[start..end]) + } + + // TODO(#140): shouldn't buffer up this entire table in memory, it'd be + // great to defer parsing everything until later. + fn inline_table(&mut self) -> Result, Value<'a>)>, Error> { + let mut ret = Vec::new(); + self.eat_whitespace()?; + if self.eat(Token::RightBrace)? { + return Ok(ret) + } + loop { + let key = self.table_key()?; + self.eat_whitespace()?; + self.expect(Token::Equals)?; + self.eat_whitespace()?; + ret.push((key, self.value()?)); + + self.eat_whitespace()?; + if self.eat(Token::RightBrace)? { + return Ok(ret) + } + self.expect(Token::Comma)?; + self.eat_whitespace()?; + } + } + + // TODO(#140): shouldn't buffer up this entire array in memory, it'd be + // great to defer parsing everything until later. + fn array(&mut self) -> Result>, Error> { + let mut ret = Vec::new(); + + let intermediate = |me: &mut Deserializer| { + loop { + me.eat_whitespace()?; + if !me.eat(Token::Newline)? && !me.eat_comment()? { + break + } + } + Ok(()) + }; + + loop { + intermediate(self)?; + if self.eat(Token::RightBracket)? { + return Ok(ret) + } + let at = self.tokens.current(); + let value = self.value()?; + if let Some(last) = ret.last() { + if !value.same_type(last) { + return Err(self.error(at, ErrorKind::MixedArrayType)) + } + } + ret.push(value); + intermediate(self)?; + if !self.eat(Token::Comma)? { + break + } + } + intermediate(self)?; + self.expect(Token::RightBracket)?; + Ok(ret) + } + + fn table_key(&mut self) -> Result, Error> { + self.tokens.table_key().map_err(|e| self.token_error(e)) + } + + fn eat_whitespace(&mut self) -> Result<(), Error> { + self.tokens.eat_whitespace().map_err(|e| self.token_error(e)) + } + + fn eat_comment(&mut self) -> Result { + self.tokens.eat_comment().map_err(|e| self.token_error(e)) + } + + fn eat_newline_or_eof(&mut self) -> Result<(), Error> { + self.tokens.eat_newline_or_eof().map_err(|e| self.token_error(e)) + } + + fn eat(&mut self, expected: Token<'a>) -> Result { + self.tokens.eat(expected).map_err(|e| self.token_error(e)) + } + + fn expect(&mut self, expected: Token<'a>) -> Result<(), Error> { + self.tokens.expect(expected).map_err(|e| self.token_error(e)) + } + + fn next(&mut self) -> Result>, Error> { + self.tokens.next().map_err(|e| self.token_error(e)) + } + + fn peek(&mut self) -> Result>, Error> { + self.tokens.peek().map_err(|e| self.token_error(e)) + } + + fn eof(&self) -> Error { + self.error(self.input.len(), ErrorKind::UnexpectedEof) + } + + fn token_error(&self, error: TokenError) -> Error { + match error { + TokenError::InvalidCharInString(at, ch) => { + self.error(at, ErrorKind::InvalidCharInString(ch)) + } + TokenError::InvalidEscape(at, ch) => { + self.error(at, ErrorKind::InvalidEscape(ch)) + } + TokenError::InvalidEscapeValue(at, v) => { + self.error(at, ErrorKind::InvalidEscapeValue(v)) + } + TokenError::InvalidHexEscape(at, ch) => { + self.error(at, ErrorKind::InvalidHexEscape(ch)) + } + TokenError::NewlineInString(at) => { + self.error(at, ErrorKind::NewlineInString) + } + TokenError::Unexpected(at, ch) => { + self.error(at, ErrorKind::Unexpected(ch)) + } + TokenError::UnterminatedString(at) => { + self.error(at, ErrorKind::UnterminatedString) + } + TokenError::NewlineInTableKey(at) => { + self.error(at, ErrorKind::NewlineInTableKey) + } + TokenError::Wanted { at, expected, found } => { + self.error(at, ErrorKind::Wanted { expected: expected, found: found }) + } + TokenError::EmptyTableKey(at) => { + self.error(at, ErrorKind::EmptyTableKey) + } + } + } + + fn error(&self, at: usize, kind: ErrorKind) -> Error { + let mut err = Error::from_kind(kind); + let (line, col) = self.to_linecol(at); + err.inner.line = Some(line); + err.inner.col = col; + return err + } + + /// Converts a byte offset from an error message to a (line, column) pair + /// + /// All indexes are 0-based. + fn to_linecol(&self, offset: usize) -> (usize, usize) { + let mut cur = 0; + for (i, line) in self.input.lines().enumerate() { + if cur + line.len() + 1 > offset { + return (i, offset - cur) + } + cur += line.len() + 1; + } + (self.input.lines().count(), 0) + } +} + +impl Error { + fn from_kind(kind: ErrorKind) -> Error { + Error { + inner: Box::new(ErrorInner { + kind: kind, + line: None, + col: 0, + message: String::new(), + key: Vec::new(), + }), + } + } + + fn custom(s: String) -> Error { + Error { + inner: Box::new(ErrorInner { + kind: ErrorKind::Custom, + line: None, + col: 0, + message: s, + key: Vec::new(), + }), + } + } + + /// Do not call this method, it may be removed at any time, it's just an + /// internal implementation detail. + #[doc(hidden)] + pub fn add_key_context(&mut self, key: &str) { + self.inner.key.insert(0, key.to_string()); + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.inner.kind { + ErrorKind::UnexpectedEof => "unexpected eof encountered".fmt(f)?, + ErrorKind::InvalidCharInString(c) => { + write!(f, "invalid character in string: `{}`", + c.escape_default().collect::())? + } + ErrorKind::InvalidEscape(c) => { + write!(f, "invalid escape character in string: `{}`", + c.escape_default().collect::())? + } + ErrorKind::InvalidHexEscape(c) => { + write!(f, "invalid hex escape character in string: `{}`", + c.escape_default().collect::())? + } + ErrorKind::InvalidEscapeValue(c) => { + write!(f, "invalid escape value: `{}`", c)? + } + ErrorKind::NewlineInString => "newline in string found".fmt(f)?, + ErrorKind::Unexpected(ch) => { + write!(f, "unexpected character found: `{}`", + ch.escape_default().collect::())? + } + ErrorKind::UnterminatedString => "unterminated string".fmt(f)?, + ErrorKind::NewlineInTableKey => "found newline in table key".fmt(f)?, + ErrorKind::Wanted { expected, found } => { + write!(f, "expected {}, found {}", expected, found)? + } + ErrorKind::NumberInvalid => "invalid number".fmt(f)?, + ErrorKind::DateInvalid => "invalid date".fmt(f)?, + ErrorKind::MixedArrayType => "mixed types in an array".fmt(f)?, + ErrorKind::DuplicateTable(ref s) => { + write!(f, "redefinition of table `{}`", s)?; + } + ErrorKind::RedefineAsArray => "table redefined as array".fmt(f)?, + ErrorKind::EmptyTableKey => "empty table key found".fmt(f)?, + ErrorKind::Custom => self.inner.message.fmt(f)?, + ErrorKind::__Nonexhaustive => panic!(), + } + + if self.inner.key.len() > 0 { + write!(f, " for key `")?; + for (i, k) in self.inner.key.iter().enumerate() { + if i > 0 { + write!(f, ".")?; + } + write!(f, "{}", k)?; + } + write!(f, "`")?; + } + + if let Some(line) = self.inner.line { + write!(f, " at line {}", line + 1)?; + } + + Ok(()) + } +} + +impl error::Error for Error { + fn description(&self) -> &str { + match self.inner.kind { + ErrorKind::UnexpectedEof => "unexpected eof encountered", + ErrorKind::InvalidCharInString(_) => "invalid char in string", + ErrorKind::InvalidEscape(_) => "invalid escape in string", + ErrorKind::InvalidHexEscape(_) => "invalid hex escape in string", + ErrorKind::InvalidEscapeValue(_) => "invalid escape value in string", + ErrorKind::NewlineInString => "newline in string found", + ErrorKind::Unexpected(_) => "unexpected or invalid character", + ErrorKind::UnterminatedString => "unterminated string", + ErrorKind::NewlineInTableKey => "found newline in table key", + ErrorKind::Wanted { .. } => "expected a token but found another", + ErrorKind::NumberInvalid => "invalid number", + ErrorKind::DateInvalid => "invalid date", + ErrorKind::MixedArrayType => "mixed types in an array", + ErrorKind::DuplicateTable(_) => "duplicate table", + ErrorKind::RedefineAsArray => "table redefined as array", + ErrorKind::EmptyTableKey => "empty table key found", + ErrorKind::Custom => "a custom error", + ErrorKind::__Nonexhaustive => panic!(), + } + } +} + +impl de::Error for Error { + fn custom(msg: T) -> Error { + Error::custom(msg.to_string()) + } +} + +enum Line<'a> { + Table { at: usize, header: Header<'a>, array: bool }, + KeyValue(Cow<'a, str>, Value<'a>), +} + +struct Header<'a> { + first: bool, + array: bool, + tokens: Tokenizer<'a>, +} + +impl<'a> Header<'a> { + fn new(tokens: Tokenizer<'a>, array: bool) -> Header<'a> { + Header { + first: true, + array: array, + tokens: tokens, + } + } + + fn next(&mut self) -> Result>, TokenError> { + self.tokens.eat_whitespace()?; + + if self.first || self.tokens.eat(Token::Period)? { + self.first = false; + self.tokens.eat_whitespace()?; + self.tokens.table_key().map(Some) + } else { + self.tokens.expect(Token::RightBracket)?; + if self.array { + self.tokens.expect(Token::RightBracket)?; + } + + self.tokens.eat_whitespace()?; + if !self.tokens.eat_comment()? { + self.tokens.eat_newline_or_eof()?; + } + Ok(None) + } + } +} + +enum Value<'a> { + Integer(i64), + Float(f64), + Boolean(bool), + String(Cow<'a, str>), + Datetime(&'a str), + Array(Vec>), + InlineTable(Vec<(Cow<'a, str>, Value<'a>)>), +} + +impl<'a> Value<'a> { + fn same_type(&self, other: &Value<'a>) -> bool { + match (self, other) { + (&Value::String(..), &Value::String(..)) | + (&Value::Integer(..), &Value::Integer(..)) | + (&Value::Float(..), &Value::Float(..)) | + (&Value::Boolean(..), &Value::Boolean(..)) | + (&Value::Datetime(..), &Value::Datetime(..)) | + (&Value::Array(..), &Value::Array(..)) | + (&Value::InlineTable(..), &Value::InlineTable(..)) => true, + + _ => false, + } + } +} -- cgit v1.2.3