mirror of
https://github.com/lightbend/config.git
synced 2025-02-20 00:00:48 +08:00
support "/" in unquoted strings (and therefore keys)
This commit is contained in:
parent
20b75542e4
commit
3610dc8d76
15
HOCON.md
15
HOCON.md
@ -27,8 +27,9 @@ Implementation-wise, the format should have these properties:
|
||||
heuristic. It should be clear what's invalid and invalid files
|
||||
should generate errors.
|
||||
- require minimal look-ahead; should be able to tokenize the file
|
||||
by looking at only the current character and the next
|
||||
character.
|
||||
by looking at only the next three characters. (right now, the
|
||||
only reason to look at three is to find "//" comments;
|
||||
otherwise you can parse looking at two.)
|
||||
|
||||
HOCON is significantly harder to specify and to parse than
|
||||
JSON. Think of it as moving the work from the person maintaining
|
||||
@ -182,8 +183,10 @@ A sequence of characters outside of a quoted string is a string
|
||||
value if:
|
||||
|
||||
- it does not contain "forbidden characters" '$', '"', '{', '}',
|
||||
'[', ']', ':', '=', ',', '+', '#', '/', '\' (backslash), or
|
||||
'[', ']', ':', '=', ',', '+', '#', '\' (backslash), or
|
||||
whitespace.
|
||||
- it does not contain the two-character string "//" (which
|
||||
starts a comment)
|
||||
- its initial characters do not parse as `true`, `false`, `null`,
|
||||
or a number.
|
||||
|
||||
@ -199,9 +202,9 @@ the unquoted string `bar` but `bar10.0` is the unquoted string
|
||||
`bar10.0`.
|
||||
|
||||
In general, once an unquoted string begins, it continues until a
|
||||
forbidden character is encountered. Embedded (non-initial)
|
||||
booleans, nulls, and numbers are not recognized as such, they are
|
||||
part of the string.
|
||||
forbidden character or the two-character string "//" is
|
||||
encountered. Embedded (non-initial) booleans, nulls, and numbers
|
||||
are not recognized as such, they are part of the string.
|
||||
|
||||
An unquoted string may not _begin_ with the digits 0-9 or with a
|
||||
hyphen (`-`, 0x002D) because those are valid characters to begin a
|
||||
|
@ -84,7 +84,7 @@ final class Tokenizer {
|
||||
|
||||
final private ConfigOrigin origin;
|
||||
final private Reader input;
|
||||
private int oneCharBuffer;
|
||||
final private LinkedList<Integer> buffer;
|
||||
private int lineNumber;
|
||||
final private Queue<Token> tokens;
|
||||
final private WhitespaceSaver whitespaceSaver;
|
||||
@ -94,7 +94,7 @@ final class Tokenizer {
|
||||
this.origin = origin;
|
||||
this.input = input;
|
||||
this.allowComments = allowComments;
|
||||
oneCharBuffer = -1;
|
||||
this.buffer = new LinkedList<Integer>();
|
||||
lineNumber = 0;
|
||||
tokens = new LinkedList<Token>();
|
||||
tokens.add(Tokens.START);
|
||||
@ -102,27 +102,29 @@ final class Tokenizer {
|
||||
}
|
||||
|
||||
|
||||
private int nextChar() {
|
||||
if (oneCharBuffer >= 0) {
|
||||
int c = oneCharBuffer;
|
||||
oneCharBuffer = -1;
|
||||
return c;
|
||||
} else {
|
||||
// this should ONLY be called from nextCharSkippingComments
|
||||
// or when inside a quoted string, everything else should
|
||||
// use nextCharSkippingComments().
|
||||
private int nextCharRaw() {
|
||||
if (buffer.isEmpty()) {
|
||||
try {
|
||||
return input.read();
|
||||
} catch (IOException e) {
|
||||
throw new ConfigException.IO(origin, "read error: "
|
||||
+ e.getMessage(), e);
|
||||
}
|
||||
} else {
|
||||
int c = buffer.pop();
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
private void putBack(int c) {
|
||||
if (oneCharBuffer >= 0) {
|
||||
if (buffer.size() > 2) {
|
||||
throw new ConfigException.BugOrBroken(
|
||||
"bug: attempt to putBack() twice in a row");
|
||||
"bug: putBack() three times, undesirable look-ahead");
|
||||
}
|
||||
oneCharBuffer = c;
|
||||
buffer.push(c);
|
||||
}
|
||||
|
||||
static boolean isWhitespace(int c) {
|
||||
@ -135,29 +137,26 @@ final class Tokenizer {
|
||||
|
||||
private int slurpComment() {
|
||||
for (;;) {
|
||||
int c = nextChar();
|
||||
int c = nextCharRaw();
|
||||
if (c == -1 || c == '\n') {
|
||||
return c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// get next char, skipping non-newline whitespace
|
||||
private int nextCharAfterWhitespace(WhitespaceSaver saver) {
|
||||
// get next char, skipping comments
|
||||
private int nextCharSkippingComments() {
|
||||
for (;;) {
|
||||
int c = nextChar();
|
||||
int c = nextCharRaw();
|
||||
|
||||
if (c == -1) {
|
||||
return -1;
|
||||
} else {
|
||||
if (isWhitespaceNotNewline(c)) {
|
||||
saver.add(c);
|
||||
continue;
|
||||
} else if (allowComments) {
|
||||
if (allowComments) {
|
||||
if (c == '#') {
|
||||
return slurpComment();
|
||||
} else if (c == '/') {
|
||||
int maybeSecondSlash = nextChar();
|
||||
int maybeSecondSlash = nextCharRaw();
|
||||
if (maybeSecondSlash == '/') {
|
||||
return slurpComment();
|
||||
} else {
|
||||
@ -174,6 +173,24 @@ final class Tokenizer {
|
||||
}
|
||||
}
|
||||
|
||||
// get next char, skipping non-newline whitespace
|
||||
private int nextCharAfterWhitespace(WhitespaceSaver saver) {
|
||||
for (;;) {
|
||||
int c = nextCharSkippingComments();
|
||||
|
||||
if (c == -1) {
|
||||
return -1;
|
||||
} else {
|
||||
if (isWhitespaceNotNewline(c)) {
|
||||
saver.add(c);
|
||||
continue;
|
||||
} else {
|
||||
return c;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private ConfigException parseError(String message) {
|
||||
return parseError(message, null);
|
||||
}
|
||||
@ -208,7 +225,7 @@ final class Tokenizer {
|
||||
// chars JSON allows to be part of a number
|
||||
static final String numberChars = "0123456789eE+-.";
|
||||
// chars that stop an unquoted string
|
||||
static final String notInUnquotedText = "$\"{}[]:=,\\+#/";
|
||||
static final String notInUnquotedText = "$\"{}[]:=,\\+#";
|
||||
|
||||
// The rules here are intended to maximize convenience while
|
||||
// avoiding confusion with real valid JSON. Basically anything
|
||||
@ -217,7 +234,7 @@ final class Tokenizer {
|
||||
private Token pullUnquotedText() {
|
||||
ConfigOrigin origin = lineOrigin();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int c = nextChar();
|
||||
int c = nextCharSkippingComments();
|
||||
while (true) {
|
||||
if (c == -1) {
|
||||
break;
|
||||
@ -244,7 +261,7 @@ final class Tokenizer {
|
||||
return Tokens.newBoolean(origin, false);
|
||||
}
|
||||
|
||||
c = nextChar();
|
||||
c = nextCharSkippingComments();
|
||||
}
|
||||
|
||||
// put back the char that ended the unquoted text
|
||||
@ -258,12 +275,12 @@ final class Tokenizer {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.appendCodePoint(firstChar);
|
||||
boolean containedDecimalOrE = false;
|
||||
int c = nextChar();
|
||||
int c = nextCharSkippingComments();
|
||||
while (c != -1 && numberChars.indexOf(c) >= 0) {
|
||||
if (c == '.' || c == 'e' || c == 'E')
|
||||
containedDecimalOrE = true;
|
||||
sb.appendCodePoint(c);
|
||||
c = nextChar();
|
||||
c = nextCharSkippingComments();
|
||||
}
|
||||
// the last character we looked at wasn't part of the number, put it
|
||||
// back
|
||||
@ -285,7 +302,7 @@ final class Tokenizer {
|
||||
}
|
||||
|
||||
private void pullEscapeSequence(StringBuilder sb) {
|
||||
int escaped = nextChar();
|
||||
int escaped = nextCharRaw();
|
||||
if (escaped == -1)
|
||||
throw parseError("End of input but backslash in string had nothing after it");
|
||||
|
||||
@ -318,7 +335,7 @@ final class Tokenizer {
|
||||
// kind of absurdly slow, but screw it for now
|
||||
char[] a = new char[4];
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
int c = nextChar();
|
||||
int c = nextCharSkippingComments();
|
||||
if (c == -1)
|
||||
throw parseError("End of input but expecting 4 hex digits for \\uXXXX escape");
|
||||
a[i] = (char) c;
|
||||
@ -346,7 +363,7 @@ final class Tokenizer {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int c = '\0'; // value doesn't get used
|
||||
do {
|
||||
c = nextChar();
|
||||
c = nextCharRaw();
|
||||
if (c == -1)
|
||||
throw parseError("End of input but string quote was still open");
|
||||
|
||||
@ -364,7 +381,7 @@ final class Tokenizer {
|
||||
private Token pullSubstitution() {
|
||||
// the initial '$' has already been consumed
|
||||
ConfigOrigin origin = lineOrigin();
|
||||
int c = nextChar();
|
||||
int c = nextCharSkippingComments();
|
||||
if (c != '{') {
|
||||
throw parseError("'$' not followed by {");
|
||||
}
|
||||
|
@ -269,4 +269,11 @@ class ConfParserTest extends TestUtils {
|
||||
parseObject("""{ "a" : "y" "b" : "z" }""")
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
def keysWithSlash() {
|
||||
val obj = parseObject("""/a/b/c=42, x/y/z : 32""")
|
||||
assertEquals(42, obj.getInt("/a/b/c"))
|
||||
assertEquals(32, obj.getInt("x/y/z"))
|
||||
}
|
||||
}
|
||||
|
@ -151,11 +151,12 @@ abstract trait TestUtils {
|
||||
"""[ { "a" : 2, "b" : ${${a}} } ]""", // nested substitution
|
||||
"[ = ]", // = is not a valid token in unquoted text
|
||||
"[ + ]",
|
||||
"[ / ]",
|
||||
"[ # ]",
|
||||
"[ \\ ]",
|
||||
"[ # comment ]",
|
||||
"${ #comment }",
|
||||
"[ // comment ]",
|
||||
"${ // comment }",
|
||||
"{ include \"bar\" : 10 }", // include with a value after it
|
||||
"{ include foo }", // include with unquoted string
|
||||
"{ include : { \"a\" : 1 } }") // include used as unquoted key
|
||||
@ -227,6 +228,7 @@ abstract trait TestUtils {
|
||||
"[ trux ]",
|
||||
"[ truex ]",
|
||||
"[ 10x ]", // number token with trailing junk
|
||||
"[ / ]", // unquoted string "slash"
|
||||
"{ include \"foo\" }", // valid include
|
||||
"{ include\n\"foo\" }", // include with just a newline separating from string
|
||||
"{ include\"foo\" }", // include with no whitespace after it
|
||||
@ -262,6 +264,7 @@ abstract trait TestUtils {
|
||||
, 11]""",
|
||||
"""[ 10 // comment
|
||||
, 11]""",
|
||||
"""{ /a/b/c : 10 }""", // key has a slash in it
|
||||
ParseTest(false, true, "[${ foo.bar}]"), // substitution with leading spaces
|
||||
ParseTest(false, true, "[${foo.bar }]"), // substitution with trailing spaces
|
||||
ParseTest(false, true, "[${ \"foo.bar\"}]"), // substitution with leading spaces and quoted
|
||||
|
@ -2,11 +2,16 @@ package com.typesafe.config.impl
|
||||
|
||||
import org.junit.Assert.assertEquals
|
||||
import org.junit.Test
|
||||
|
||||
import com.typesafe.config.ConfigException
|
||||
|
||||
class TokenizerTest extends TestUtils {
|
||||
|
||||
// FIXME most of this file should be using this method
|
||||
private def tokenizerTest(expected: List[Token], s: String) {
|
||||
assertEquals(List(Tokens.START) ++ expected ++ List(Tokens.END),
|
||||
tokenizeAsList(s))
|
||||
}
|
||||
|
||||
@Test
|
||||
def tokenizeEmptyString() {
|
||||
assertEquals(List(Tokens.START, Tokens.END),
|
||||
@ -91,6 +96,14 @@ class TokenizerTest extends TestUtils {
|
||||
assertEquals(expected, tokenizeAsList("""true foo"""))
|
||||
}
|
||||
|
||||
@Test
|
||||
def tokenizeUnquotedTextContainingSlash() {
|
||||
tokenizerTest(List(tokenUnquoted("a/b/c/")), "a/b/c/")
|
||||
tokenizerTest(List(tokenUnquoted("/")), "/")
|
||||
tokenizerTest(List(tokenUnquoted("/"), tokenUnquoted(" "), tokenUnquoted("/")), "/ /")
|
||||
tokenizerTest(Nil, "//")
|
||||
}
|
||||
|
||||
@Test
|
||||
def tokenizeUnquotedTextTrimsSpaces() {
|
||||
val expected = List(Tokens.START, tokenUnquoted("foo"), Tokens.newLine(0), Tokens.END)
|
||||
@ -182,4 +195,16 @@ class TokenizerTest extends TestUtils {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
def commentsIgnoredInVariousContext() {
|
||||
tokenizerTest(List(tokenString("//bar")), "\"//bar\"")
|
||||
tokenizerTest(List(tokenString("#bar")), "\"#bar\"")
|
||||
tokenizerTest(List(tokenUnquoted("bar")), "bar//comment")
|
||||
tokenizerTest(List(tokenUnquoted("bar")), "bar#comment")
|
||||
tokenizerTest(List(tokenInt(10)), "10//comment")
|
||||
tokenizerTest(List(tokenInt(10)), "10#comment")
|
||||
tokenizerTest(List(tokenDouble(3.14)), "3.14//comment")
|
||||
tokenizerTest(List(tokenDouble(3.14)), "3.14#comment")
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user