From 9548288de5dca4d765eba0755955108009294951 Mon Sep 17 00:00:00 2001 From: Garrett Berg Date: Thu, 27 Jul 2017 16:37:30 -0600 Subject: fix bugs with pretty --- src/ser.rs | 52 +++++++++++++++++++++++----------------------------- tests/pretty.rs | 19 +++++++++++++++++++ tests/valid.rs | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 92 insertions(+), 34 deletions(-) diff --git a/src/ser.rs b/src/ser.rs index 3c9b354..febe429 100644 --- a/src/ser.rs +++ b/src/ser.rs @@ -450,14 +450,18 @@ impl<'a> Serializer<'a> { if ok { drop(write!(self.dst, "{}", key)); } else { - self.emit_str(key)?; + self.emit_str(key, true)?; } Ok(()) } - fn emit_str(&mut self, value: &str) -> Result<(), Error> { - let do_pretty = if self.settings.pretty_string { - value.contains('\n') + fn emit_str(&mut self, value: &str, is_key: bool) -> Result<(), Error> { + let do_pretty = if !is_key && self.settings.pretty_string { + // do pretty only if the block contains at least one newline + value.contains('\n') + // but not if it contains any of these, as they are not + // representable + && !value.contains("'''") } else { false }; @@ -466,31 +470,21 @@ impl<'a> Serializer<'a> { } else { drop(write!(self.dst, "\"")); } - for ch in value.chars() { - match ch { - '\u{8}' => drop(write!(self.dst, "\\b")), - '\u{9}' => drop(write!(self.dst, "\\t")), - '\u{a}' => { - if do_pretty { - drop(write!(self.dst, "\n")); - } else { - drop(write!(self.dst, "\\n")); - } - }, - '\u{c}' => drop(write!(self.dst, "\\f")), - '\u{d}' => drop(write!(self.dst, "\\r")), - '\u{22}' => { - if do_pretty { - drop(write!(self.dst, "\"")) - } else { - drop(write!(self.dst, "\\\"")) - } - }, - '\u{5c}' => drop(write!(self.dst, "\\\\")), - c if c < '\u{1f}' => { - drop(write!(self.dst, "\\u{:04X}", ch as u32)) + if do_pretty { + drop(write!(self.dst, "{}", value)); + } else { + for ch in value.chars() { + match ch { + '\u{8}' => drop(write!(self.dst, "\\b")), + '\u{9}' => drop(write!(self.dst, "\\t")), + '\u{a}' => drop(write!(self.dst, "\\n")), + '\u{c}' => drop(write!(self.dst, "\\f")), + '\u{d}' => drop(write!(self.dst, "\\r")), + '\u{22}' => drop(write!(self.dst, "\\\"")), + '\u{5c}' => drop(write!(self.dst, "\\\\")), + c if c < '\u{1f}' => drop(write!(self.dst, "\\u{:04X}", ch as u32)), + ch => drop(write!(self.dst, "{}", ch)), } - ch => drop(write!(self.dst, "{}", ch)), } } if do_pretty { @@ -651,7 +645,7 @@ impl<'a, 'b> ser::Serializer for &'b mut Serializer<'a> { fn serialize_str(mut self, value: &str) -> Result<(), Self::Error> { self.emit_key("string")?; - self.emit_str(value)?; + self.emit_str(value, false)?; if let State::Table { .. } = self.state { self.dst.push_str("\n"); } diff --git a/tests/pretty.rs b/tests/pretty.rs index ae9a839..f7d1612 100644 --- a/tests/pretty.rs +++ b/tests/pretty.rs @@ -166,3 +166,22 @@ fn pretty_no_string() { } assert_eq!(toml, &result); } + +const PRETTY_TRICKY: &'static str = r"[example] +text = ''' +this is the first line +This has a ''\' in it for no reason +this is the third line +''' +"; + +#[test] +fn pretty_tricky() { + let toml = PRETTY_TRICKY; + let value: toml::Value = toml::from_str(toml).unwrap(); + let mut result = String::with_capacity(128); + value.serialize(&mut toml::Serializer::pretty(&mut result)).unwrap(); + println!("EXPECTED:\n{}", toml); + println!("\nRESULT:\n{}", result); + assert_eq!(toml, &result); +} diff --git a/tests/valid.rs b/tests/valid.rs index 6a04866..b186800 100644 --- a/tests/valid.rs +++ b/tests/valid.rs @@ -1,7 +1,9 @@ extern crate toml; +extern crate serde; extern crate serde_json; -use toml::Value as Toml; +use toml::{Value as Toml, to_string_pretty}; +use serde::ser::Serialize; use serde_json::Value as Json; fn to_json(toml: toml::Value) -> Json { @@ -40,10 +42,52 @@ fn to_json(toml: toml::Value) -> Json { } } -fn run(toml: &str, json: &str) { - println!("parsing:\n{}", toml); - let toml: Toml = toml.parse().unwrap(); - let json: Json = json.parse().unwrap(); +fn run_pretty(toml: Toml) { + // Assert toml == json + println!("### pretty round trip parse."); + + // standard pretty + let toml_raw = to_string_pretty(&toml).expect("to string"); + let toml2 = toml_raw.parse().expect("from string"); + assert_eq!(toml, toml2); + + // pretty with indent 2 + let mut result = String::with_capacity(128); + { + let mut serializer = toml::Serializer::pretty(&mut result); + serializer.pretty_array_indent(2); + toml.serialize(&mut serializer).expect("to string"); + } + assert_eq!(toml, result.parse().expect("from str")); + result.clear(); + { + let mut serializer = toml::Serializer::new(&mut result); + serializer.pretty_array_trailing_comma(false); + toml.serialize(&mut serializer).expect("to string"); + } + assert_eq!(toml, result.parse().expect("from str")); + result.clear(); + { + let mut serializer = toml::Serializer::pretty(&mut result); + serializer.pretty_string(false); + toml.serialize(&mut serializer).expect("to string"); + assert_eq!(toml, toml2); + } + assert_eq!(toml, result.parse().expect("from str")); + result.clear(); + { + let mut serializer = toml::Serializer::pretty(&mut result); + serializer.pretty_array(false); + toml.serialize(&mut serializer).expect("to string"); + assert_eq!(toml, toml2); + } + assert_eq!(toml, result.parse().expect("from str")); +} + +fn run(toml_raw: &str, json_raw: &str) { + println!("parsing:\n{}", toml_raw); + let toml: Toml = toml_raw.parse().unwrap(); + let json: Json = json_raw.parse().unwrap(); // Assert toml == json let toml_json = to_json(toml.clone()); @@ -56,6 +100,7 @@ fn run(toml: &str, json: &str) { println!("round trip parse: {}", toml); let toml2 = toml.to_string().parse().unwrap(); assert_eq!(toml, toml2); + run_pretty(toml); } macro_rules! test( ($name:ident, $toml:expr, $json:expr) => ( -- cgit v1.2.3 From a29c1eeef04eed525d7fe48b8e06ae31c0a60575 Mon Sep 17 00:00:00 2001 From: Garrett Berg Date: Thu, 27 Jul 2017 22:44:58 -0600 Subject: make single lines also pretty --- src/ser.rs | 146 +++++++++++++++++++++++++++++++++++++++----------------- tests/pretty.rs | 21 +++++--- 2 files changed, 117 insertions(+), 50 deletions(-) diff --git a/src/ser.rs b/src/ser.rs index febe429..516b25f 100644 --- a/src/ser.rs +++ b/src/ser.rs @@ -201,6 +201,7 @@ enum State<'a> { parent: &'a State<'a>, first: &'a Cell, type_: &'a Cell>, + len: Option, }, End, } @@ -210,6 +211,7 @@ pub struct SerializeSeq<'a: 'b, 'b> { ser: &'b mut Serializer<'a>, first: Cell, type_: Cell>, + len: Option, } #[doc(hidden)] @@ -377,12 +379,12 @@ impl<'a> Serializer<'a> { fn _emit_key(&mut self, state: &State) -> Result<(), Error> { match *state { State::End => Ok(()), - State::Array { parent, first, type_ } => { + State::Array { parent, first, type_, len } => { assert!(type_.get().is_some()); if first.get() { self._emit_key(parent)?; } - self.emit_array(first) + self.emit_array(first, len) } State::Table { parent, first, table_emitted, key } => { if table_emitted.get() { @@ -399,9 +401,16 @@ impl<'a> Serializer<'a> { } } - fn emit_array(&mut self, first: &Cell) -> Result<(), Error> { - match self.settings.array { - Some(ref a) => { + fn emit_array(&mut self, first: &Cell, len: Option) -> Result<(), Error> { + match (len, &self.settings.array) { + (Some(0...1), _) | (_, &None) => { + if first.get() { + self.dst.push_str("[") + } else { + self.dst.push_str(", ") + } + }, + (_, &Some(ref a)) => { if first.get() { self.dst.push_str("[\n") } else { @@ -411,13 +420,6 @@ impl<'a> Serializer<'a> { self.dst.push_str(" "); } }, - None => { - if first.get() { - self.dst.push_str("[") - } else { - self.dst.push_str(", ") - } - }, } Ok(()) } @@ -456,41 +458,95 @@ impl<'a> Serializer<'a> { } fn emit_str(&mut self, value: &str, is_key: bool) -> Result<(), Error> { - let do_pretty = if !is_key && self.settings.pretty_string { - // do pretty only if the block contains at least one newline - value.contains('\n') - // but not if it contains any of these, as they are not - // representable - && !value.contains("'''") + /// Use ''' or ' + #[derive(PartialEq)] + enum Multi { + False, + True, + Single, + } + fn do_pretty(value: &str) -> Option<(Multi, String)> { + // For doing pretty prints we store in a new String + // because there are too many cases where pretty cannot + // work. We need to determine: + // - if we are a "multi-line" pretty (if there are \n) + // - if ['''] appears if multi or ['] if single + // - if there are any invalid control characters + // + // Doing it any other way would require multiple passes + // to determine if a pretty string works or not. + let mut out = String::with_capacity(value.len() * 2); + let mut multi = Multi::False; + // found consecutive single quotes + let mut max_found_singles = 0; + let mut found_singles = 0; + + for ch in value.chars() { + if ch == '\'' { + found_singles += 1; + if found_singles >= 3 { + return None; + } + } else { + if found_singles > max_found_singles { + max_found_singles = found_singles; + } + found_singles = 0 + } + match ch { + '\t' => {}, + '\n' => multi = Multi::True, + // note that the following are invalid: \b \f \r + c if c < '\u{1f}' => return None, // Invalid control character + _ => {} + } + out.push(ch); + } + if found_singles > max_found_singles { + max_found_singles = found_singles; + } + debug_assert!(max_found_singles < 3); + if multi == Multi::False && max_found_singles >= 1 { + // no newlines, but must use ''' because it has ' in it + multi = Multi::Single; + } + Some((multi, out)) + } + + let pretty = if !is_key && self.settings.pretty_string { + do_pretty(value) } else { - false + None }; - if do_pretty { - drop(write!(self.dst, "'''\n")); + if let Some((multi, s)) = pretty { + // A pretty string + match multi { + Multi::True => self.dst.push_str("'''\n"), + Multi::Single => self.dst.push_str("'''"), + Multi::False => self.dst.push('\''), + } + self.dst.push_str(&s); + match multi { + Multi::False => self.dst.push('\''), + _ => self.dst.push_str("'''"), + } } else { + // Not a pretty string drop(write!(self.dst, "\"")); - } - if do_pretty { - drop(write!(self.dst, "{}", value)); - } else { for ch in value.chars() { match ch { - '\u{8}' => drop(write!(self.dst, "\\b")), - '\u{9}' => drop(write!(self.dst, "\\t")), - '\u{a}' => drop(write!(self.dst, "\\n")), - '\u{c}' => drop(write!(self.dst, "\\f")), - '\u{d}' => drop(write!(self.dst, "\\r")), - '\u{22}' => drop(write!(self.dst, "\\\"")), - '\u{5c}' => drop(write!(self.dst, "\\\\")), + '\u{8}' => self.dst.push_str("\\b"), + '\u{9}' => self.dst.push_str("\\t"), + '\u{a}' => self.dst.push_str("\\n"), + '\u{c}' => self.dst.push_str("\\f"), + '\u{d}' => self.dst.push_str("\\r"), + '\u{22}' => self.dst.push_str("\\\""), + '\u{5c}' => self.dst.push_str("\\\\"), c if c < '\u{1f}' => drop(write!(self.dst, "\\u{:04X}", ch as u32)), - ch => drop(write!(self.dst, "{}", ch)), + ch => self.dst.push(ch), } } - } - if do_pretty { - drop(write!(self.dst, "'''")); - } else { - drop(write!(self.dst, "\"")); + self.dst.push_str("\""); } Ok(()) } @@ -703,13 +759,14 @@ impl<'a, 'b> ser::Serializer for &'b mut Serializer<'a> { Err(Error::UnsupportedType) } - fn serialize_seq(mut self, _len: Option) + fn serialize_seq(mut self, len: Option) -> Result { self.array_type("array")?; Ok(SerializeSeq { ser: self, first: Cell::new(true), type_: Cell::new(None), + len: len, }) } @@ -782,6 +839,7 @@ impl<'a, 'b> ser::SerializeSeq for SerializeSeq<'a, 'b> { parent: &self.ser.state, first: &self.first, type_: &self.type_, + len: self.len, }, settings: self.ser.settings.clone(), })?; @@ -793,16 +851,16 @@ impl<'a, 'b> ser::SerializeSeq for SerializeSeq<'a, 'b> { match self.type_.get() { Some("table") => return Ok(()), Some(_) => { - match self.ser.settings.array { - Some(ref a) => { + match (self.len, &self.ser.settings.array) { + (Some(0...1), _) | (_, &None) => { + self.ser.dst.push_str("]"); + }, + (_, &Some(ref a)) => { if a.trailing_comma { self.ser.dst.push_str(","); } self.ser.dst.push_str("\n]"); }, - None => { - self.ser.dst.push_str("]"); - } } } None => { diff --git a/tests/pretty.rs b/tests/pretty.rs index f7d1612..308772d 100644 --- a/tests/pretty.rs +++ b/tests/pretty.rs @@ -41,11 +41,12 @@ fn disable_pretty() { const PRETTY_STD: &'static str = "\ [example] array = [ - \"item 1\", - \"item 2\", + 'item 1', + 'item 2', ] empty = [] -oneline = \"this has no newlines.\" +one = ['one'] +oneline = 'this has no newlines.' text = ''' this is the first line this is the second line @@ -67,15 +68,21 @@ fn pretty_std() { const PRETTY_INDENT_2: &'static str = "\ [example] array = [ - \"item 1\", - \"item 2\", + 'item 1', + 'item 2', ] empty = [] -oneline = \"this has no newlines.\" +one = ['one'] +oneline = 'this has no newlines.' text = ''' this is the first line this is the second line ''' +three = [ + 'one', + 'two', + 'three', +] "; #[test] @@ -88,6 +95,7 @@ fn pretty_indent_2() { serializer.pretty_array_indent(2); value.serialize(&mut serializer).unwrap(); } + println!(">> Result:\n{}", result); assert_eq!(toml, &result); } @@ -168,6 +176,7 @@ fn pretty_no_string() { } const PRETTY_TRICKY: &'static str = r"[example] +single = '''this is a single line but has '' for no reason''' text = ''' this is the first line This has a ''\' in it for no reason -- cgit v1.2.3 From ca81a4a291930aa25637cf814b2f760f4e9d487d Mon Sep 17 00:00:00 2001 From: Garrett Berg Date: Fri, 28 Jul 2017 09:12:21 -0600 Subject: add """ for non-literals with newlines, clean up logic and add tests --- src/ser.rs | 146 +++++++++++++++++++++++++++++++++++--------------------- tests/pretty.rs | 32 ++++++++++--- 2 files changed, 116 insertions(+), 62 deletions(-) diff --git a/src/ser.rs b/src/ser.rs index 516b25f..721fbf1 100644 --- a/src/ser.rs +++ b/src/ser.rs @@ -458,14 +458,21 @@ impl<'a> Serializer<'a> { } fn emit_str(&mut self, value: &str, is_key: bool) -> Result<(), Error> { - /// Use ''' or ' #[derive(PartialEq)] - enum Multi { - False, - True, - Single, + enum Type { + NewlineTripple, + OnelineTripple, + OnelineSingle, } - fn do_pretty(value: &str) -> Option<(Multi, String)> { + + enum Repr { + /// represent as a literal string (using '') + Literal(String, Type), + /// represent the std way (using "") + Std(Type), + } + + fn do_pretty(value: &str) -> Repr { // For doing pretty prints we store in a new String // because there are too many cases where pretty cannot // work. We need to determine: @@ -476,77 +483,106 @@ impl<'a> Serializer<'a> { // Doing it any other way would require multiple passes // to determine if a pretty string works or not. let mut out = String::with_capacity(value.len() * 2); - let mut multi = Multi::False; + let mut ty = Type::OnelineSingle; // found consecutive single quotes let mut max_found_singles = 0; let mut found_singles = 0; + let mut can_be_pretty = true; for ch in value.chars() { - if ch == '\'' { - found_singles += 1; - if found_singles >= 3 { - return None; + if can_be_pretty { + if ch == '\'' { + found_singles += 1; + if found_singles >= 3 { + can_be_pretty = false; + } + } else { + if found_singles > max_found_singles { + max_found_singles = found_singles; + } + found_singles = 0 + } + match ch { + '\t' => {}, + '\n' => ty = Type::NewlineTripple, + // note that the following are invalid: \b \f \r + c if c < '\u{1f}' => can_be_pretty = false, // Invalid control character + _ => {} } + out.push(ch); } else { - if found_singles > max_found_singles { - max_found_singles = found_singles; + // the string cannot be represented as pretty, + // still check if it should be multiline + if ch == '\n' { + ty = Type::NewlineTripple; } - found_singles = 0 } - match ch { - '\t' => {}, - '\n' => multi = Multi::True, - // note that the following are invalid: \b \f \r - c if c < '\u{1f}' => return None, // Invalid control character - _ => {} - } - out.push(ch); + } + if !can_be_pretty { + debug_assert!(ty != Type::OnelineTripple); + return Repr::Std(ty); } if found_singles > max_found_singles { max_found_singles = found_singles; } debug_assert!(max_found_singles < 3); - if multi == Multi::False && max_found_singles >= 1 { + if ty == Type::OnelineSingle && max_found_singles >= 1 { // no newlines, but must use ''' because it has ' in it - multi = Multi::Single; + ty = Type::OnelineTripple; } - Some((multi, out)) + Repr::Literal(out, ty) } - let pretty = if !is_key && self.settings.pretty_string { + let repr = if !is_key && self.settings.pretty_string { do_pretty(value) } else { - None + Repr::Std(Type::OnelineSingle) }; - if let Some((multi, s)) = pretty { - // A pretty string - match multi { - Multi::True => self.dst.push_str("'''\n"), - Multi::Single => self.dst.push_str("'''"), - Multi::False => self.dst.push('\''), - } - self.dst.push_str(&s); - match multi { - Multi::False => self.dst.push('\''), - _ => self.dst.push_str("'''"), - } - } else { - // Not a pretty string - drop(write!(self.dst, "\"")); - for ch in value.chars() { - match ch { - '\u{8}' => self.dst.push_str("\\b"), - '\u{9}' => self.dst.push_str("\\t"), - '\u{a}' => self.dst.push_str("\\n"), - '\u{c}' => self.dst.push_str("\\f"), - '\u{d}' => self.dst.push_str("\\r"), - '\u{22}' => self.dst.push_str("\\\""), - '\u{5c}' => self.dst.push_str("\\\\"), - c if c < '\u{1f}' => drop(write!(self.dst, "\\u{:04X}", ch as u32)), - ch => self.dst.push(ch), + match repr { + Repr::Literal(literal, ty) => { + // A pretty string + match ty { + Type::NewlineTripple => self.dst.push_str("'''\n"), + Type::OnelineTripple => self.dst.push_str("'''"), + Type::OnelineSingle => self.dst.push('\''), } - } - self.dst.push_str("\""); + self.dst.push_str(&literal); + match ty { + Type::OnelineSingle => self.dst.push('\''), + _ => self.dst.push_str("'''"), + } + }, + Repr::Std(ty) => { + match ty { + Type::NewlineTripple => self.dst.push_str("\"\"\"\n"), + Type::OnelineSingle => self.dst.push('"'), + _ => unreachable!(), + } + for ch in value.chars() { + match ch { + '\u{8}' => self.dst.push_str("\\b"), + '\u{9}' => self.dst.push_str("\\t"), + '\u{a}' => { + match ty { + Type::NewlineTripple => self.dst.push('\n'), + Type::OnelineSingle => self.dst.push_str("\\n"), + _ => unreachable!(), + } + }, + '\u{c}' => self.dst.push_str("\\f"), + '\u{d}' => self.dst.push_str("\\r"), + '\u{22}' => self.dst.push_str("\\\""), + '\u{5c}' => self.dst.push_str("\\\\"), + c if c < '\u{1f}' => drop(write!(self.dst, "\\u{:04X}", ch as u32)), + ch => self.dst.push(ch), + } + } + match ty { + Type::NewlineTripple => self.dst.push_str("\"\"\""), + Type::OnelineSingle => self.dst.push('"'), + _ => unreachable!(), + } + }, } Ok(()) } diff --git a/tests/pretty.rs b/tests/pretty.rs index 308772d..cceb815 100644 --- a/tests/pretty.rs +++ b/tests/pretty.rs @@ -175,14 +175,32 @@ fn pretty_no_string() { assert_eq!(toml, &result); } -const PRETTY_TRICKY: &'static str = r"[example] -single = '''this is a single line but has '' for no reason''' -text = ''' -this is the first line -This has a ''\' in it for no reason -this is the third line +const PRETTY_TRICKY: &'static str = r##"[example] +f = "\f" +glass = ''' +Nothing too unusual, except that I can eat glass in: +- Greek: Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα. +- Polish: Mogę jeść szkło, i mi nie szkodzi. +- Hindi: मैं काँच खा सकता हूँ, मुझे उस से कोई पीडा नहीं होती. +- Japanese: 私はガラスを食べられます。それは私を傷つけません。 ''' -"; +r = "\r" +r_newline = """ +\r +""" +single = '''this is a single line but has '' cuz it's tricky''' +single_tricky = "single line with ''' in it" +tabs = ''' +this is pretty standard + except for some tabs right here +''' +text = """ +this is the first line. +This has a ''' in it and \"\"\" cuz it's tricky yo +Also ' and \" because why not +this is the third line +""" +"##; #[test] fn pretty_tricky() { -- cgit v1.2.3