Implement triple-quoted multiline strings

Details are in the spec/news/readme changes in the patch.
This commit is contained in:
Havoc Pennington 2012-06-18 23:01:43 -04:00
parent 6b0e8c4047
commit e1d5bc7506
11 changed files with 145 additions and 14 deletions

View File

@ -230,6 +230,22 @@ Some of the "forbidden characters" are forbidden because they
already have meaning in JSON or HOCON, others are essentially already have meaning in JSON or HOCON, others are essentially
reserved keywords to allow future extensions to this spec. reserved keywords to allow future extensions to this spec.
### Multi-line strings
Multi-line strings are similar to Python or Scala, using triple
quotes. If the three-character sequence `"""` appears, then all
Unicode characters until a closing `"""` sequence are used
unmodified to create a string value. Newlines and whitespace
receive no special treatment. Unlike Scala, and unlike JSON quoted
strings, Unicode escapes are not interpreted in triple-quoted
strings.
In Python, `"""foo""""` is a syntax error (a triple-quoted string
followed by a dangling unbalanced quote). In Scala, it is a
four-character string `foo"`. HOCON works like Scala; any sequence
of at least three quotes ends the multi-line string, and any
"extra" quotes are part of the string.
### Value concatenation ### Value concatenation
The value of an object field or array element may consist of The value of an object field or array element may consist of

17
NEWS.md
View File

@ -1,3 +1,20 @@
# X.Y.Z: SOMETIME, 2012
- triple-quoted strings as in Python or Scala
- obscure backward incompatibilities:
- `""""` previously parsed as two empty strings concatenated
into a single empty string, now it parses as an unterminated
triple-quoted string.
- a triple-quoted string like `"""\n"""` previously parsed as
an empty string, a string with one newline character, and
another empty string, all concatenated into a single
string. Now it parses as a string with two characters
(backslash and lowercase "n").
- in short you could have two adjacent quoted strings before,
where one was an empty string, and now you can't. As far as
I know, the empty string was always worthless in this case
and can just be removed.
# 0.4.1: May 22, 2012 # 0.4.1: May 22, 2012
- publish as OSGi bundle - publish as OSGi bundle

View File

@ -240,6 +240,7 @@ tree that you could have written (less conveniently) in JSON.
- substitutions normally cause an error if unresolved, but - substitutions normally cause an error if unresolved, but
there is a syntax `${?a.b}` to permit them to be missing. there is a syntax `${?a.b}` to permit them to be missing.
- `+=` syntax to append elements to arrays, `path += "/bin"` - `+=` syntax to append elements to arrays, `path += "/bin"`
- multi-line strings with triple quotes as in Python or Scala
### Examples of HOCON ### Examples of HOCON

View File

@ -416,6 +416,31 @@ final class Tokenizer {
} }
} }
private void appendTripleQuotedString(StringBuilder sb) throws ProblemException {
// we are after the opening triple quote and need to consume the
// close triple
int consecutiveQuotes = 0;
for (;;) {
int c = nextCharRaw();
if (c == '"') {
consecutiveQuotes += 1;
} else if (consecutiveQuotes >= 3) {
// the last three quotes end the string and the others are
// kept.
sb.setLength(sb.length() - 3);
putBack(c);
break;
} else {
consecutiveQuotes = 0;
if (c == -1)
throw problem("End of input but triple-quoted string was still open");
}
sb.appendCodePoint(c);
}
}
private Token pullQuotedString() throws ProblemException { private Token pullQuotedString() throws ProblemException {
// the open quote has already been consumed // the open quote has already been consumed
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
@ -436,6 +461,17 @@ final class Tokenizer {
sb.appendCodePoint(c); sb.appendCodePoint(c);
} }
} while (c != '"'); } while (c != '"');
// maybe switch to triple-quoted string, sort of hacky...
if (sb.length() == 0) {
int third = nextCharRaw();
if (third == '"') {
appendTripleQuotedString(sb);
} else {
putBack(third);
}
}
return Tokens.newString(lineOrigin, sb.toString()); return Tokens.newString(lineOrigin, sb.toString());
} }

View File

@ -0,0 +1,16 @@
{
"a" : "hello",
"b" : "hello\nworld",
"b1" : "hello\\nworld",
"c" : " \n hello \n world \n ",
"d" : " \"hello\" ",
"e" : "\"hello\"",
"f1" : "hello\"",
"f2" : "hello\"\"",
"f3" : "hello\"\"\"",
"f4" : "hello\"\"\"\"",
"g1" : "\"hello",
"g2" : "\"\"hello",
"h" : "\"\"",
"i" : "foo\"\"bar"
}

View File

@ -0,0 +1,20 @@
{
a : """hello""",
b : """hello
world""",
b1 : """hello\nworld""",
c : """
hello
world
""",
d : """ "hello" """,
e : """"hello"""",
f1 : """hello"""",
f2 : """hello""""",
f3 : """hello"""""",
f4 : """hello""""""",
g1 : """"hello""",
g2 : """""hello""",
h : """""""",
i : """foo""bar"""
}

View File

@ -1,8 +1,7 @@
{ {
"" : { "" : { "" : 42 } }, "" : { "" : { "" : 42 } },
"42_a" : ${""."".""}, "42_a" : ${""."".""},
"42_b" : ${""""."""".""""}, "42_b" : ${ "".""."" },
"42_c" : ${ """".""""."""" },
"a" : { "b" : { "c" : 57 } }, "a" : { "b" : { "c" : 57 } },
"57_a" : ${a.b.c}, "57_a" : ${a.b.c},
"57_b" : ${"a"."b"."c"}, "57_b" : ${"a"."b"."c"},

View File

@ -129,9 +129,6 @@ class ConfParserTest extends TestUtils {
assertEquals(path("a", "", "b"), parsePath("a.\"\".b")) assertEquals(path("a", "", "b"), parsePath("a.\"\".b"))
assertEquals(path("a", ""), parsePath("a.\"\"")) assertEquals(path("a", ""), parsePath("a.\"\""))
assertEquals(path("", "b"), parsePath("\"\".b")) assertEquals(path("", "b"), parsePath("\"\".b"))
assertEquals(path(""), parsePath("\"\"\"\""))
assertEquals(path("a", ""), parsePath("a.\"\"\"\""))
assertEquals(path("", "b"), parsePath("\"\"\"\".b"))
assertEquals(path("", "", ""), parsePath(""" "".""."" """)) assertEquals(path("", "", ""), parsePath(""" "".""."" """))
assertEquals(path("a-c"), parsePath("a-c")) assertEquals(path("a-c"), parsePath("a-c"))
assertEquals(path("a_c"), parsePath("a_c")) assertEquals(path("a_c"), parsePath("a_c"))

View File

@ -826,7 +826,6 @@ class ConfigTest extends TestUtils {
assertEquals(42, conf.getInt("42_a")) assertEquals(42, conf.getInt("42_a"))
assertEquals(42, conf.getInt("42_b")) assertEquals(42, conf.getInt("42_b"))
assertEquals(42, conf.getInt("42_c"))
assertEquals(57, conf.getInt("57_a")) assertEquals(57, conf.getInt("57_a"))
assertEquals(57, conf.getInt("57_b")) assertEquals(57, conf.getInt("57_b"))
assertEquals(103, conf.getInt("103_a")) assertEquals(103, conf.getInt("103_a"))

View File

@ -134,8 +134,8 @@ class EquivalentsTest extends TestUtils {
// This is a little "checksum" to be sure we really tested what we were expecting. // This is a little "checksum" to be sure we really tested what we were expecting.
// it breaks every time you add a file, so you have to update it. // it breaks every time you add a file, so you have to update it.
assertEquals(4, dirCount) assertEquals(5, dirCount)
// this is the number of files not named original.* // this is the number of files not named original.*
assertEquals(14, fileCount) assertEquals(15, fileCount)
} }
} }

View File

@ -34,37 +34,37 @@ class TokenizerTest extends TestUtils {
// is actually extra work). // is actually extra work).
val expected = List(Tokens.START, Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY, val expected = List(Tokens.START, Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY,
Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"), Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"),
tokenTrue, tokenDouble(3.14), tokenFalse, tokenString("bar"), tokenTrue, tokenDouble(3.14), tokenFalse,
tokenLong(42), tokenNull, tokenSubstitution(tokenUnquoted("a.b")), tokenLong(42), tokenNull, tokenSubstitution(tokenUnquoted("a.b")),
tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenOptionalSubstitution(tokenUnquoted("x.y")),
tokenKeySubstitution("c.d"), tokenLine(1), Tokens.END) tokenKeySubstitution("c.d"), tokenLine(1), Tokens.END)
assertEquals(expected, tokenizeAsList(""",:=}{][+="foo"true3.14false42null${a.b}${?x.y}${"c.d"}""" + "\n")) assertEquals(expected, tokenizeAsList(""",:=}{][+="foo"""" + "\"\"\"bar\"\"\"" + """true3.14false42null${a.b}${?x.y}${"c.d"}""" + "\n"))
} }
@Test @Test
def tokenizeAllTypesWithSingleSpaces() { def tokenizeAllTypesWithSingleSpaces() {
val expected = List(Tokens.START, Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY, val expected = List(Tokens.START, Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY,
Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"), Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"),
tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "), tokenTrue, tokenUnquoted(" "), tokenUnquoted(" "), tokenString("bar"), tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "), tokenTrue, tokenUnquoted(" "),
tokenDouble(3.14), tokenUnquoted(" "), tokenFalse, tokenUnquoted(" "), tokenNull, tokenDouble(3.14), tokenUnquoted(" "), tokenFalse, tokenUnquoted(" "), tokenNull,
tokenUnquoted(" "), tokenSubstitution(tokenUnquoted("a.b")), tokenUnquoted(" "), tokenUnquoted(" "), tokenSubstitution(tokenUnquoted("a.b")), tokenUnquoted(" "),
tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenUnquoted(" "), tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenUnquoted(" "),
tokenKeySubstitution("c.d"), tokenKeySubstitution("c.d"),
tokenLine(1), Tokens.END) tokenLine(1), Tokens.END)
assertEquals(expected, tokenizeAsList(""" , : = } { ] [ += "foo" 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n ")) assertEquals(expected, tokenizeAsList(""" , : = } { ] [ += "foo" """ + "\"\"\"bar\"\"\"" + """ 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n "))
} }
@Test @Test
def tokenizeAllTypesWithMultipleSpaces() { def tokenizeAllTypesWithMultipleSpaces() {
val expected = List(Tokens.START, Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY, val expected = List(Tokens.START, Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY,
Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"), Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"),
tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "), tokenTrue, tokenUnquoted(" "), tokenUnquoted(" "), tokenString("bar"), tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "), tokenTrue, tokenUnquoted(" "),
tokenDouble(3.14), tokenUnquoted(" "), tokenFalse, tokenUnquoted(" "), tokenNull, tokenDouble(3.14), tokenUnquoted(" "), tokenFalse, tokenUnquoted(" "), tokenNull,
tokenUnquoted(" "), tokenSubstitution(tokenUnquoted("a.b")), tokenUnquoted(" "), tokenUnquoted(" "), tokenSubstitution(tokenUnquoted("a.b")), tokenUnquoted(" "),
tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenUnquoted(" "), tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenUnquoted(" "),
tokenKeySubstitution("c.d"), tokenKeySubstitution("c.d"),
tokenLine(1), Tokens.END) tokenLine(1), Tokens.END)
assertEquals(expected, tokenizeAsList(""" , : = } { ] [ += "foo" 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n ")) assertEquals(expected, tokenizeAsList(""" , : = } { ] [ += "foo" """ + "\"\"\"bar\"\"\"" + """ 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n "))
} }
@Test @Test
@ -179,6 +179,36 @@ class TokenizerTest extends TestUtils {
} }
} }
@Test
def tokenizerEmptyTripleQuoted(): Unit = {
assertEquals(List(Tokens.START, tokenString(""), Tokens.END),
tokenizeAsList("\"\"\"\"\"\""))
}
@Test
def tokenizerTrivialTripleQuoted(): Unit = {
assertEquals(List(Tokens.START, tokenString("bar"), Tokens.END),
tokenizeAsList("\"\"\"bar\"\"\""))
}
@Test
def tokenizerNoEscapesInTripleQuoted(): Unit = {
assertEquals(List(Tokens.START, tokenString("\\n"), Tokens.END),
tokenizeAsList("\"\"\"\\n\"\"\""))
}
@Test
def tokenizerTrailingQuotesInTripleQuoted(): Unit = {
assertEquals(List(Tokens.START, tokenString("\"\"\""), Tokens.END),
tokenizeAsList("\"\"\"\"\"\"\"\"\""))
}
@Test
def tokenizerNewlineInTripleQuoted(): Unit = {
assertEquals(List(Tokens.START, tokenString("foo\nbar"), Tokens.END),
tokenizeAsList("\"\"\"foo\nbar\"\"\""))
}
@Test @Test
def tokenizerParseNumbers(): Unit = { def tokenizerParseNumbers(): Unit = {
abstract class NumberTest(val s: String, val result: Token) abstract class NumberTest(val s: String, val result: Token)