subs in quotes

This commit is contained in:
Ryan O'Neill 2015-08-06 14:14:52 -07:00
parent 680ee748dd
commit fd8d159919
3 changed files with 89 additions and 19 deletions

View File

@ -38,7 +38,7 @@ final class Tokenizer {
return "tab"; return "tab";
else if (codepoint == -1) else if (codepoint == -1)
return "end of file"; return "end of file";
else if (ConfigImplUtil.isC0Control(codepoint)) else if (Character.isISOControl(codepoint))
return String.format("control character 0x%x", codepoint); return String.format("control character 0x%x", codepoint);
else else
return String.format("%c", codepoint); return String.format("%c", codepoint);
@ -172,6 +172,12 @@ final class Tokenizer {
buffer.push(c); buffer.push(c);
} }
private int peekNextCharRaw() {
int c = nextCharRaw();
putBack(c);
return c;
}
static boolean isWhitespace(int c) { static boolean isWhitespace(int c) {
return ConfigImplUtil.isWhitespace(c); return ConfigImplUtil.isWhitespace(c);
} }
@ -418,6 +424,9 @@ final class Tokenizer {
case 't': case 't':
sb.append('\t'); sb.append('\t');
break; break;
case '$':
sb.append("\\$");
break;
case 'u': { case 'u': {
// kind of absurdly slow, but screw it for now // kind of absurdly slow, but screw it for now
char[] a = new char[4]; char[] a = new char[4];
@ -477,7 +486,9 @@ final class Tokenizer {
} }
} }
private Token pullQuotedString() throws ProblemException { private List<Token> pullQuotedString() throws ProblemException {
List<Token> tokens = new ArrayList<Token>();
// the open quote has already been consumed // the open quote has already been consumed
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
@ -488,6 +499,23 @@ final class Tokenizer {
StringBuilder sbOrig = new StringBuilder(); StringBuilder sbOrig = new StringBuilder();
sbOrig.appendCodePoint('"'); sbOrig.appendCodePoint('"');
// First, check for triple quotes
if (peekNextCharRaw() == '"') { // Double quotes
int second = nextCharRaw();
if (peekNextCharRaw() == '"') { // Triple quotes! Append and return token
int third = nextCharRaw();
sbOrig.appendCodePoint(second);
sbOrig.appendCodePoint(third);
appendTripleQuotedString(sb, sbOrig);
tokens.add(Tokens.newString(lineOrigin, sb.toString(), sbOrig.toString()));
return tokens;
} else { // Empty string, handled by normal string termination case below
putBack(second);
}
}
// Single quoted string with possible substitutions
while (true) { while (true) {
int c = nextCharRaw(); int c = nextCharRaw();
if (c == -1) if (c == -1)
@ -497,8 +525,19 @@ final class Tokenizer {
pullEscapeSequence(sb, sbOrig); pullEscapeSequence(sb, sbOrig);
} else if (c == '"') { } else if (c == '"') {
sbOrig.appendCodePoint(c); sbOrig.appendCodePoint(c);
tokens.add(Tokens.newString(lineOrigin, sb.toString(), sbOrig.toString()));
break; break;
} else if (ConfigImplUtil.isC0Control(c)) { } else if (c == '$' && peekNextCharRaw() == '{') { // Substition
// Tokenize what we have so far
tokens.add(Tokens.newString(lineOrigin, sb.toString(), sbOrig.toString()));
// Add substition
tokens.add(pullSubstitution());
// Reset and continue
sb = new StringBuilder();
sbOrig = new StringBuilder();
} else if (Character.isISOControl(c)) {
throw problem(asString(c), "JSON does not allow unescaped " + asString(c) throw problem(asString(c), "JSON does not allow unescaped " + asString(c)
+ " in quoted strings, use a backslash escape"); + " in quoted strings, use a backslash escape");
} else { } else {
@ -507,18 +546,7 @@ final class Tokenizer {
} }
} }
// maybe switch to triple-quoted string, sort of hacky... return tokens;
if (sb.length() == 0) {
int third = nextCharRaw();
if (third == '"') {
sbOrig.appendCodePoint(third);
appendTripleQuotedString(sb, sbOrig);
} else {
putBack(third);
}
}
return Tokens.newString(lineOrigin, sb.toString(), sbOrig.toString());
} }
private Token pullPlusEquals() throws ProblemException { private Token pullPlusEquals() throws ProblemException {
@ -575,7 +603,17 @@ final class Tokenizer {
return Tokens.newSubstitution(origin, optional, expression); return Tokens.newSubstitution(origin, optional, expression);
} }
// Occasionally pullNextToken will encounter a situation where it needs to
// parse multiple tokens. When that happens it will populate this queue and pop
// from it until empty before attempting to parse a new token.
// Substitutions within quoted strings are an example of this.
private static Queue<Token> nextTokensQueue = new LinkedList<Token>();
private Token pullNextToken(WhitespaceSaver saver) throws ProblemException { private Token pullNextToken(WhitespaceSaver saver) throws ProblemException {
if (!nextTokensQueue.isEmpty()) {
return nextTokensQueue.remove();
}
int c = nextCharAfterWhitespace(saver); int c = nextCharAfterWhitespace(saver);
if (c == -1) { if (c == -1) {
return Tokens.END; return Tokens.END;
@ -592,7 +630,11 @@ final class Tokenizer {
} else { } else {
switch (c) { switch (c) {
case '"': case '"':
t = pullQuotedString(); List<Token> all = pullQuotedString();
t = all.remove(0);
for (Token n: all) {
nextTokensQueue.add(n);
}
break; break;
case '$': case '$':
t = pullSubstitution(); t = pullSubstitution();
@ -692,4 +734,4 @@ final class Tokenizer {
"Does not make sense to remove items from token stream"); "Does not make sense to remove items from token stream");
} }
} }
} }

View File

@ -152,6 +152,31 @@ class TokenizerTest extends TestUtils {
tokenizerTest(expected, source) tokenizerTest(expected, source)
} }
@Test
def tokenizeSubstitutionsInQuoted() {
val source = "\"foo${bar}baz\"\n"
val expected = List(tokenString("foo"), tokenSubstitution(tokenUnquoted("bar")),
tokenString("baz"),
tokenLine(1))
tokenizerTest(expected, source)
}
@Test
def tokenizeSubstitutionsInQuotedAtBeg() {
val source = "\"${bar}baz\"\n"
val expected = List(tokenString(""), tokenSubstitution(tokenUnquoted("bar")),
tokenString("baz"),
tokenLine(1))
tokenizerTest(expected, source)
}
@Test
def tokenizeSubstitutionsInQuotedAtEnd() {
val source = "\"foo${bar}\""
val expected = List(tokenString("foo"), tokenSubstitution(tokenUnquoted("bar")), tokenString(""))
tokenizerTest(expected, source)
}
@Test @Test
def tokenizerUnescapeStrings(): Unit = { def tokenizerUnescapeStrings(): Unit = {
case class UnescapeTest(escaped: String, result: ConfigString) case class UnescapeTest(escaped: String, result: ConfigString)

View File

@ -57,8 +57,6 @@ class UtilTest extends TestUtils {
assertTrue(ConfigImplUtil.equalsHandlingNull("", "")) assertTrue(ConfigImplUtil.equalsHandlingNull("", ""))
} }
val lotsOfStrings = (invalidJson ++ validConf).map(_.test)
private def roundtripJson(s: String) { private def roundtripJson(s: String) {
val rendered = ConfigImplUtil.renderJsonString(s) val rendered = ConfigImplUtil.renderJsonString(s)
val parsed = parseConfig("{ foo: " + rendered + "}").getString("foo") val parsed = parseConfig("{ foo: " + rendered + "}").getString("foo")
@ -77,6 +75,11 @@ class UtilTest extends TestUtils {
s == parsed) s == parsed)
} }
// These strings are used in many different ways, but for testing how things
// render we don't want to have any substitutions because this render code
// does not resolve the configs.
val lotsOfStrings = (invalidJson ++ validConf).map(_.test).filter(_.indexOf("${") == -1)
@Test @Test
def renderJsonString() { def renderJsonString() {
for (s <- lotsOfStrings) { for (s <- lotsOfStrings) {