Implement triple-quoted multiline strings

Details are in the spec/news/readme changes in the patch.
This commit is contained in:
Havoc Pennington 2012-06-18 23:01:43 -04:00
parent 6b0e8c4047
commit e1d5bc7506
11 changed files with 145 additions and 14 deletions

View File

@ -230,6 +230,22 @@ Some of the "forbidden characters" are forbidden because they
already have meaning in JSON or HOCON, others are essentially
reserved keywords to allow future extensions to this spec.
### Multi-line strings
Multi-line strings are similar to Python or Scala, using triple
quotes. If the three-character sequence `"""` appears, then all
Unicode characters until a closing `"""` sequence are used
unmodified to create a string value. Newlines and whitespace
receive no special treatment. Unlike Scala, and unlike JSON quoted
strings, Unicode escapes are not interpreted in triple-quoted
strings.
In Python, `"""foo""""` is a syntax error (a triple-quoted string
followed by a dangling unbalanced quote). In Scala, it is a
four-character string `foo"`. HOCON works like Scala; any sequence
of at least three quotes ends the multi-line string, and any
"extra" quotes are part of the string.
### Value concatenation
The value of an object field or array element may consist of

17
NEWS.md
View File

@ -1,3 +1,20 @@
# X.Y.Z: SOMETIME, 2012
- triple-quoted strings as in Python or Scala
- obscure backward incompatibilities:
- `""""` previously parsed as two empty strings concatenated
into a single empty string, now it parses as an unterminated
triple-quoted string.
- a triple-quoted string like `"""\n"""` previously parsed as
an empty string, a string with one newline character, and
another empty string, all concatenated into a single
string. Now it parses as a string with two characters
(backslash and lowercase "n").
- in short you could have two adjacent quoted strings before,
where one was an empty string, and now you can't. As far as
I know, the empty string was always worthless in this case
and can just be removed.
# 0.4.1: May 22, 2012
- publish as OSGi bundle

View File

@ -240,6 +240,7 @@ tree that you could have written (less conveniently) in JSON.
- substitutions normally cause an error if unresolved, but
there is a syntax `${?a.b}` to permit them to be missing.
- `+=` syntax to append elements to arrays, `path += "/bin"`
- multi-line strings with triple quotes as in Python or Scala
### Examples of HOCON

View File

@ -416,6 +416,31 @@ final class Tokenizer {
}
}
private void appendTripleQuotedString(StringBuilder sb) throws ProblemException {
// we are after the opening triple quote and need to consume the
// close triple
int consecutiveQuotes = 0;
for (;;) {
int c = nextCharRaw();
if (c == '"') {
consecutiveQuotes += 1;
} else if (consecutiveQuotes >= 3) {
// the last three quotes end the string and the others are
// kept.
sb.setLength(sb.length() - 3);
putBack(c);
break;
} else {
consecutiveQuotes = 0;
if (c == -1)
throw problem("End of input but triple-quoted string was still open");
}
sb.appendCodePoint(c);
}
}
private Token pullQuotedString() throws ProblemException {
// the open quote has already been consumed
StringBuilder sb = new StringBuilder();
@ -436,6 +461,17 @@ final class Tokenizer {
sb.appendCodePoint(c);
}
} while (c != '"');
// maybe switch to triple-quoted string, sort of hacky...
if (sb.length() == 0) {
int third = nextCharRaw();
if (third == '"') {
appendTripleQuotedString(sb);
} else {
putBack(third);
}
}
return Tokens.newString(lineOrigin, sb.toString());
}

View File

@ -0,0 +1,16 @@
{
"a" : "hello",
"b" : "hello\nworld",
"b1" : "hello\\nworld",
"c" : " \n hello \n world \n ",
"d" : " \"hello\" ",
"e" : "\"hello\"",
"f1" : "hello\"",
"f2" : "hello\"\"",
"f3" : "hello\"\"\"",
"f4" : "hello\"\"\"\"",
"g1" : "\"hello",
"g2" : "\"\"hello",
"h" : "\"\"",
"i" : "foo\"\"bar"
}

View File

@ -0,0 +1,20 @@
{
a : """hello""",
b : """hello
world""",
b1 : """hello\nworld""",
c : """
hello
world
""",
d : """ "hello" """,
e : """"hello"""",
f1 : """hello"""",
f2 : """hello""""",
f3 : """hello"""""",
f4 : """hello""""""",
g1 : """"hello""",
g2 : """""hello""",
h : """""""",
i : """foo""bar"""
}

View File

@ -1,8 +1,7 @@
{
"" : { "" : { "" : 42 } },
"42_a" : ${""."".""},
"42_b" : ${""""."""".""""},
"42_c" : ${ """".""""."""" },
"42_b" : ${ "".""."" },
"a" : { "b" : { "c" : 57 } },
"57_a" : ${a.b.c},
"57_b" : ${"a"."b"."c"},

View File

@ -129,9 +129,6 @@ class ConfParserTest extends TestUtils {
assertEquals(path("a", "", "b"), parsePath("a.\"\".b"))
assertEquals(path("a", ""), parsePath("a.\"\""))
assertEquals(path("", "b"), parsePath("\"\".b"))
assertEquals(path(""), parsePath("\"\"\"\""))
assertEquals(path("a", ""), parsePath("a.\"\"\"\""))
assertEquals(path("", "b"), parsePath("\"\"\"\".b"))
assertEquals(path("", "", ""), parsePath(""" "".""."" """))
assertEquals(path("a-c"), parsePath("a-c"))
assertEquals(path("a_c"), parsePath("a_c"))

View File

@ -826,7 +826,6 @@ class ConfigTest extends TestUtils {
assertEquals(42, conf.getInt("42_a"))
assertEquals(42, conf.getInt("42_b"))
assertEquals(42, conf.getInt("42_c"))
assertEquals(57, conf.getInt("57_a"))
assertEquals(57, conf.getInt("57_b"))
assertEquals(103, conf.getInt("103_a"))

View File

@ -134,8 +134,8 @@ class EquivalentsTest extends TestUtils {
// This is a little "checksum" to be sure we really tested what we were expecting.
// it breaks every time you add a file, so you have to update it.
assertEquals(4, dirCount)
assertEquals(5, dirCount)
// this is the number of files not named original.*
assertEquals(14, fileCount)
assertEquals(15, fileCount)
}
}

View File

@ -34,37 +34,37 @@ class TokenizerTest extends TestUtils {
// is actually extra work).
val expected = List(Tokens.START, Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY,
Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"),
tokenTrue, tokenDouble(3.14), tokenFalse,
tokenString("bar"), tokenTrue, tokenDouble(3.14), tokenFalse,
tokenLong(42), tokenNull, tokenSubstitution(tokenUnquoted("a.b")),
tokenOptionalSubstitution(tokenUnquoted("x.y")),
tokenKeySubstitution("c.d"), tokenLine(1), Tokens.END)
assertEquals(expected, tokenizeAsList(""",:=}{][+="foo"true3.14false42null${a.b}${?x.y}${"c.d"}""" + "\n"))
assertEquals(expected, tokenizeAsList(""",:=}{][+="foo"""" + "\"\"\"bar\"\"\"" + """true3.14false42null${a.b}${?x.y}${"c.d"}""" + "\n"))
}
@Test
def tokenizeAllTypesWithSingleSpaces() {
val expected = List(Tokens.START, Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY,
Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"),
tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "), tokenTrue, tokenUnquoted(" "),
tokenUnquoted(" "), tokenString("bar"), tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "), tokenTrue, tokenUnquoted(" "),
tokenDouble(3.14), tokenUnquoted(" "), tokenFalse, tokenUnquoted(" "), tokenNull,
tokenUnquoted(" "), tokenSubstitution(tokenUnquoted("a.b")), tokenUnquoted(" "),
tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenUnquoted(" "),
tokenKeySubstitution("c.d"),
tokenLine(1), Tokens.END)
assertEquals(expected, tokenizeAsList(""" , : = } { ] [ += "foo" 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n "))
assertEquals(expected, tokenizeAsList(""" , : = } { ] [ += "foo" """ + "\"\"\"bar\"\"\"" + """ 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n "))
}
@Test
def tokenizeAllTypesWithMultipleSpaces() {
val expected = List(Tokens.START, Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY,
Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"),
tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "), tokenTrue, tokenUnquoted(" "),
tokenUnquoted(" "), tokenString("bar"), tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "), tokenTrue, tokenUnquoted(" "),
tokenDouble(3.14), tokenUnquoted(" "), tokenFalse, tokenUnquoted(" "), tokenNull,
tokenUnquoted(" "), tokenSubstitution(tokenUnquoted("a.b")), tokenUnquoted(" "),
tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenUnquoted(" "),
tokenKeySubstitution("c.d"),
tokenLine(1), Tokens.END)
assertEquals(expected, tokenizeAsList(""" , : = } { ] [ += "foo" 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n "))
assertEquals(expected, tokenizeAsList(""" , : = } { ] [ += "foo" """ + "\"\"\"bar\"\"\"" + """ 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n "))
}
@Test
@ -179,6 +179,36 @@ class TokenizerTest extends TestUtils {
}
}
@Test
def tokenizerEmptyTripleQuoted(): Unit = {
assertEquals(List(Tokens.START, tokenString(""), Tokens.END),
tokenizeAsList("\"\"\"\"\"\""))
}
@Test
def tokenizerTrivialTripleQuoted(): Unit = {
assertEquals(List(Tokens.START, tokenString("bar"), Tokens.END),
tokenizeAsList("\"\"\"bar\"\"\""))
}
@Test
def tokenizerNoEscapesInTripleQuoted(): Unit = {
assertEquals(List(Tokens.START, tokenString("\\n"), Tokens.END),
tokenizeAsList("\"\"\"\\n\"\"\""))
}
@Test
def tokenizerTrailingQuotesInTripleQuoted(): Unit = {
assertEquals(List(Tokens.START, tokenString("\"\"\""), Tokens.END),
tokenizeAsList("\"\"\"\"\"\"\"\"\""))
}
@Test
def tokenizerNewlineInTripleQuoted(): Unit = {
assertEquals(List(Tokens.START, tokenString("foo\nbar"), Tokens.END),
tokenizeAsList("\"\"\"foo\nbar\"\"\""))
}
@Test
def tokenizerParseNumbers(): Unit = {
abstract class NumberTest(val s: String, val result: Token)