prohibit control characters in quoted strings per JSON spec

This commit is contained in:
Havoc Pennington 2011-11-17 00:44:19 -05:00
parent c3fe5509a7
commit 817bbb73eb
4 changed files with 22 additions and 1 deletions

View File

@ -219,6 +219,12 @@ value. Again, these characters are not special _inside_ an
unquoted string; they only trigger number parsing if they appear
initially.
Note that quoted JSON strings may not contain control characters
(control characters include some whitespace characters, such as
newline). This rule is from the JSON spec. However, unquoted
strings have no restriction on control characters, other than the
ones listed as "forbidden characters" above.
### Value concatenation
The value of an object field or an array element may consist of

View File

@ -358,6 +358,18 @@ final class Tokenizer {
}
}
private ConfigException controlCharacterError(int c) {
String asString;
if (c == '\n')
asString = "newline";
else if (c == '\t')
asString = "tab";
else
asString = String.format("control character 0x%x", c);
return parseError("JSON does not allow unescaped " + asString
+ " in quoted strings, use a backslash escape");
}
private Token pullQuotedString() {
// the open quote has already been consumed
StringBuilder sb = new StringBuilder();
@ -371,6 +383,8 @@ final class Tokenizer {
pullEscapeSequence(sb);
} else if (c == '"') {
// end the loop, done!
} else if (Character.isISOControl(c)) {
throw controlCharacterError(c);
} else {
sb.appendCodePoint(c);
}

View File

@ -153,6 +153,7 @@ abstract trait TestUtils {
"[ + ]",
"[ # ]",
"[ \\ ]",
ParseTest(true, "[ \"foo\nbar\" ]"), // unescaped newline in quoted string, lift doesn't care
"[ # comment ]",
"${ #comment }",
"[ // comment ]",

View File

@ -137,7 +137,7 @@ class TokenizerTest extends TestUtils {
assertEquals('6', "\\u0046"(5))
val tests = List[UnescapeTest]((""" "" """, ""),
(" \"\0\" ", "\0"), // nul byte
(" \"\\u0000\" ", "\0"), // nul byte
(""" "\"\\\/\b\f\n\r\t" """, "\"\\/\b\f\n\r\t"),
("\"\\u0046\"", "F"),
("\"\\u0046\\u0046\"", "FF"))