From 574f810a0fbfdaaf3bd635e21f87af22f2e75a34 Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Wed, 25 Feb 2015 16:51:43 -0800 Subject: [PATCH 1/3] Add lossless tokens minus comments Keep the original text of all tokens when Tokenizing a config minus comments. Add a render method to the TokenIterator class which generates the original text from which the tokens were parsed. --- .../java/com/typesafe/config/impl/Parser.java | 21 ++- .../java/com/typesafe/config/impl/Token.java | 14 +- .../com/typesafe/config/impl/TokenType.java | 1 + .../com/typesafe/config/impl/Tokenizer.java | 62 ++++++-- .../java/com/typesafe/config/impl/Tokens.java | 80 +++++++--- .../com/typesafe/config/impl/TestUtils.scala | 7 +- .../typesafe/config/impl/TokenizerTest.scala | 142 ++++++++++++------ 7 files changed, 234 insertions(+), 93 deletions(-) diff --git a/config/src/main/java/com/typesafe/config/impl/Parser.java b/config/src/main/java/com/typesafe/config/impl/Parser.java index ed64064e..bff033cb 100644 --- a/config/src/main/java/com/typesafe/config/impl/Parser.java +++ b/config/src/main/java/com/typesafe/config/impl/Parser.java @@ -203,7 +203,7 @@ final class Parser { } previous = next; - next = tokens.next(); + next = nextTokenFromIterator(); } // put our concluding token in the queue with all the comments @@ -219,7 +219,7 @@ final class Parser { private TokenWithComments popTokenWithoutTrailingComment() { if (buffer.isEmpty()) { - Token t = tokens.next(); + Token t = nextTokenFromIterator(); if (Tokens.isComment(t)) { consolidateCommentBlock(t); return buffer.pop(); @@ -243,7 +243,7 @@ final class Parser { if (!attractsTrailingComments(withPrecedingComments.token)) { return withPrecedingComments; } else if (buffer.isEmpty()) { - Token after = tokens.next(); + Token after = nextTokenFromIterator(); if (Tokens.isComment(after)) { return withPrecedingComments.add(after); } else { @@ -319,6 +319,16 @@ final class Parser { return t; } + // Grabs the next Token off of the TokenIterator, ignoring + // IgnoredWhitespace tokens + private Token nextTokenFromIterator() { + Token t; + do { + t = tokens.next(); + } while (Tokens.isIgnoredWhitespace(t)); + return t; + } + private AbstractConfigValue addAnyCommentsAfterAnyComma(AbstractConfigValue v) { TokenWithComments t = nextToken(); // do NOT skip newlines, we only // want same-line comments @@ -1063,6 +1073,11 @@ final class Parser { while (expression.hasNext()) { Token t = expression.next(); + + // Ignore all IgnoredWhitespace tokens + if (Tokens.isIgnoredWhitespace(t)) + continue; + if (Tokens.isValueWithType(t, ConfigValueType.STRING)) { AbstractConfigValue v = Tokens.getValue(t); // this is a quoted string; so any periods diff --git a/config/src/main/java/com/typesafe/config/impl/Token.java b/config/src/main/java/com/typesafe/config/impl/Token.java index 5f16d26e..1c199850 100644 --- a/config/src/main/java/com/typesafe/config/impl/Token.java +++ b/config/src/main/java/com/typesafe/config/impl/Token.java @@ -10,26 +10,34 @@ class Token { final private TokenType tokenType; final private String debugString; final private ConfigOrigin origin; + final private String tokenText; Token(TokenType tokenType, ConfigOrigin origin) { this(tokenType, origin, null); } - Token(TokenType tokenType, ConfigOrigin origin, String debugString) { + Token(TokenType tokenType, ConfigOrigin origin, String tokenText) { + this(tokenType, origin, tokenText, null); + } + + Token(TokenType tokenType, ConfigOrigin origin, String tokenText, String debugString) { this.tokenType = tokenType; this.origin = origin; this.debugString = debugString; + this.tokenText = tokenText; } // this is used for singleton tokens like COMMA or OPEN_CURLY - static Token newWithoutOrigin(TokenType tokenType, String debugString) { - return new Token(tokenType, null, debugString); + static Token newWithoutOrigin(TokenType tokenType, String debugString, String tokenText) { + return new Token(tokenType, null, tokenText, debugString); } final TokenType tokenType() { return tokenType; } + final String tokenText() { return tokenText; } + // this is final because we don't always use the origin() accessor, // and we don't because it throws if origin is null final ConfigOrigin origin() { diff --git a/config/src/main/java/com/typesafe/config/impl/TokenType.java b/config/src/main/java/com/typesafe/config/impl/TokenType.java index 7202a9b3..c095c65d 100644 --- a/config/src/main/java/com/typesafe/config/impl/TokenType.java +++ b/config/src/main/java/com/typesafe/config/impl/TokenType.java @@ -16,6 +16,7 @@ enum TokenType { VALUE, NEWLINE, UNQUOTED_TEXT, + IGNORED_WHITESPACE, SUBSTITUTION, PROBLEM, COMMENT, diff --git a/config/src/main/java/com/typesafe/config/impl/Tokenizer.java b/config/src/main/java/com/typesafe/config/impl/Tokenizer.java index d8701f93..fe4e08bd 100644 --- a/config/src/main/java/com/typesafe/config/impl/Tokenizer.java +++ b/config/src/main/java/com/typesafe/config/impl/Tokenizer.java @@ -52,6 +52,14 @@ final class Tokenizer { return new TokenIterator(origin, input, flavor != ConfigSyntax.JSON); } + static String render(Iterator tokens) { + String renderedText = ""; + while (tokens.hasNext()) { + renderedText += tokens.next().tokenText(); + } + return renderedText; + } + private static class TokenIterator implements Iterator { private static class WhitespaceSaver { @@ -66,25 +74,30 @@ final class Tokenizer { } void add(int c) { - if (lastTokenWasSimpleValue) - whitespace.appendCodePoint(c); + whitespace.appendCodePoint(c); } Token check(Token t, ConfigOrigin baseOrigin, int lineNumber) { if (isSimpleValue(t)) { return nextIsASimpleValue(baseOrigin, lineNumber); } else { - nextIsNotASimpleValue(); - return null; + return nextIsNotASimpleValue(baseOrigin, lineNumber); } } // called if the next token is not a simple value; // discards any whitespace we were saving between // simple values. - private void nextIsNotASimpleValue() { + private Token nextIsNotASimpleValue(ConfigOrigin baseOrigin, int lineNumber) { lastTokenWasSimpleValue = false; - whitespace.setLength(0); + + if (whitespace.length() > 0) { + Token t = Tokens.newIgnoredWhitespace(lineOrigin(baseOrigin, lineNumber), + whitespace.toString()); + whitespace.setLength(0); + return t; + } + return null; } // called if the next token IS a simple value, @@ -107,7 +120,12 @@ final class Tokenizer { } } else { lastTokenWasSimpleValue = true; - whitespace.setLength(0); + if (whitespace.length() > 0) { + Token t = Tokens.newIgnoredWhitespace(lineOrigin(baseOrigin, lineNumber), + whitespace.toString()); + whitespace.setLength(0); + return t; + } return null; } } @@ -367,11 +385,16 @@ final class Tokenizer { } } - private void pullEscapeSequence(StringBuilder sb) throws ProblemException { + private void pullEscapeSequence(StringBuilder sb, StringBuilder sbOrig) throws ProblemException { int escaped = nextCharRaw(); if (escaped == -1) throw problem("End of input but backslash in string had nothing after it"); + // This is needed so we return the unescaped escape characters back out when rendering + // the token + sbOrig.appendCodePoint('\\'); + sbOrig.appendCodePoint(escaped); + switch (escaped) { case '"': sb.append('"'); @@ -407,6 +430,7 @@ final class Tokenizer { a[i] = (char) c; } String digits = new String(a); + sbOrig.append(a); try { sb.appendCodePoint(Integer.parseInt(digits, 16)); } catch (NumberFormatException e) { @@ -424,7 +448,7 @@ final class Tokenizer { } } - private void appendTripleQuotedString(StringBuilder sb) throws ProblemException { + private void appendTripleQuotedString(StringBuilder sb, StringBuilder sbOrig) throws ProblemException { // we are after the opening triple quote and need to consume the // close triple int consecutiveQuotes = 0; @@ -451,26 +475,37 @@ final class Tokenizer { } sb.appendCodePoint(c); + sbOrig.appendCodePoint(c); } } private Token pullQuotedString() throws ProblemException { // the open quote has already been consumed StringBuilder sb = new StringBuilder(); + + // We need a second string builder to keep track of escape characters. + // We want to return them exactly as they appeared in the original text, + // which means we will need a new StringBuilder to escape escape characters + // so we can also keep the actual value of the string. This is gross. + StringBuilder sbOrig = new StringBuilder(); + sbOrig.appendCodePoint('"'); + while (true) { int c = nextCharRaw(); if (c == -1) throw problem("End of input but string quote was still open"); if (c == '\\') { - pullEscapeSequence(sb); + pullEscapeSequence(sb, sbOrig); } else if (c == '"') { + sbOrig.appendCodePoint(c); break; } else if (Character.isISOControl(c)) { throw problem(asString(c), "JSON does not allow unescaped " + asString(c) + " in quoted strings, use a backslash escape"); } else { sb.appendCodePoint(c); + sbOrig.appendCodePoint(c); } } @@ -478,13 +513,14 @@ final class Tokenizer { if (sb.length() == 0) { int third = nextCharRaw(); if (third == '"') { - appendTripleQuotedString(sb); + sbOrig.appendCodePoint(third); + appendTripleQuotedString(sb, sbOrig); } else { putBack(third); } - } - return Tokens.newString(lineOrigin, sb.toString()); + } + return Tokens.newString(lineOrigin, sb.toString(), sbOrig.toString()); } private Token pullPlusEquals() throws ProblemException { diff --git a/config/src/main/java/com/typesafe/config/impl/Tokens.java b/config/src/main/java/com/typesafe/config/impl/Tokens.java index 8ad1f1a2..f328c1af 100644 --- a/config/src/main/java/com/typesafe/config/impl/Tokens.java +++ b/config/src/main/java/com/typesafe/config/impl/Tokens.java @@ -16,7 +16,11 @@ final class Tokens { final private AbstractConfigValue value; Value(AbstractConfigValue value) { - super(TokenType.VALUE, value.origin()); + this(value, null); + } + + Value(AbstractConfigValue value, String origText) { + super(TokenType.VALUE, value.origin(), origText); this.value = value; } @@ -50,7 +54,7 @@ final class Tokens { static private class Line extends Token { Line(ConfigOrigin origin) { - super(TokenType.NEWLINE, origin); + super(TokenType.NEWLINE, origin, "\n"); } @Override @@ -79,7 +83,7 @@ final class Tokens { final private String value; UnquotedText(ConfigOrigin origin, String s) { - super(TokenType.UNQUOTED_TEXT, origin); + super(TokenType.UNQUOTED_TEXT, origin, s); this.value = s; } @@ -109,6 +113,20 @@ final class Tokens { } } + static private class IgnoredWhitespace extends Token { + final private String value; + + IgnoredWhitespace(ConfigOrigin origin, String s) { + super(TokenType.IGNORED_WHITESPACE, origin, s); + this.value = s; + } + + String value() { return value; } + + @Override + public String toString() { return "'" + value + "' (WHITESPACE)"; } + } + static private class Problem extends Token { final private String what; final private String message; @@ -222,7 +240,8 @@ final class Tokens { final private List value; Substitution(ConfigOrigin origin, boolean optional, List expression) { - super(TokenType.SUBSTITUTION, origin); + super(TokenType.SUBSTITUTION, origin, + "${" + (optional? "?" : "") + Tokenizer.render(expression.iterator()) + "}"); this.optional = optional; this.value = expression; } @@ -344,6 +363,10 @@ final class Tokens { } } + static boolean isIgnoredWhitespace(Token token) { + return token instanceof IgnoredWhitespace; + } + static boolean isSubstitution(Token token) { return token instanceof Substitution; } @@ -366,16 +389,16 @@ final class Tokens { } } - final static Token START = Token.newWithoutOrigin(TokenType.START, "start of file"); - final static Token END = Token.newWithoutOrigin(TokenType.END, "end of file"); - final static Token COMMA = Token.newWithoutOrigin(TokenType.COMMA, "','"); - final static Token EQUALS = Token.newWithoutOrigin(TokenType.EQUALS, "'='"); - final static Token COLON = Token.newWithoutOrigin(TokenType.COLON, "':'"); - final static Token OPEN_CURLY = Token.newWithoutOrigin(TokenType.OPEN_CURLY, "'{'"); - final static Token CLOSE_CURLY = Token.newWithoutOrigin(TokenType.CLOSE_CURLY, "'}'"); - final static Token OPEN_SQUARE = Token.newWithoutOrigin(TokenType.OPEN_SQUARE, "'['"); - final static Token CLOSE_SQUARE = Token.newWithoutOrigin(TokenType.CLOSE_SQUARE, "']'"); - final static Token PLUS_EQUALS = Token.newWithoutOrigin(TokenType.PLUS_EQUALS, "'+='"); + final static Token START = Token.newWithoutOrigin(TokenType.START, "start of file", ""); + final static Token END = Token.newWithoutOrigin(TokenType.END, "end of file", ""); + final static Token COMMA = Token.newWithoutOrigin(TokenType.COMMA, "','", ","); + final static Token EQUALS = Token.newWithoutOrigin(TokenType.EQUALS, "'='", "="); + final static Token COLON = Token.newWithoutOrigin(TokenType.COLON, "':'", ":"); + final static Token OPEN_CURLY = Token.newWithoutOrigin(TokenType.OPEN_CURLY, "'{'", "{"); + final static Token CLOSE_CURLY = Token.newWithoutOrigin(TokenType.CLOSE_CURLY, "'}'", "}"); + final static Token OPEN_SQUARE = Token.newWithoutOrigin(TokenType.OPEN_SQUARE, "'['", "["); + final static Token CLOSE_SQUARE = Token.newWithoutOrigin(TokenType.CLOSE_SQUARE, "']'", "]"); + final static Token PLUS_EQUALS = Token.newWithoutOrigin(TokenType.PLUS_EQUALS, "'+='", "+="); static Token newLine(ConfigOrigin origin) { return new Line(origin); @@ -394,6 +417,10 @@ final class Tokens { return new UnquotedText(origin, s); } + static Token newIgnoredWhitespace(ConfigOrigin origin, String s) { + return new IgnoredWhitespace(origin, s); + } + static Token newSubstitution(ConfigOrigin origin, boolean optional, List expression) { return new Substitution(origin, optional, expression); } @@ -401,32 +428,35 @@ final class Tokens { static Token newValue(AbstractConfigValue value) { return new Value(value); } - - static Token newString(ConfigOrigin origin, String value) { - return newValue(new ConfigString.Quoted(origin, value)); + static Token newValue(AbstractConfigValue value, String origText) { + return new Value(value, origText); } - static Token newInt(ConfigOrigin origin, int value, String originalText) { + static Token newString(ConfigOrigin origin, String value, String origText) { + return newValue(new ConfigString.Quoted(origin, value), origText); + } + + static Token newInt(ConfigOrigin origin, int value, String origText) { return newValue(ConfigNumber.newNumber(origin, value, - originalText)); + origText), origText); } static Token newDouble(ConfigOrigin origin, double value, - String originalText) { + String origText) { return newValue(ConfigNumber.newNumber(origin, value, - originalText)); + origText), origText); } - static Token newLong(ConfigOrigin origin, long value, String originalText) { + static Token newLong(ConfigOrigin origin, long value, String origText) { return newValue(ConfigNumber.newNumber(origin, value, - originalText)); + origText), origText); } static Token newNull(ConfigOrigin origin) { - return newValue(new ConfigNull(origin)); + return newValue(new ConfigNull(origin), "null"); } static Token newBoolean(ConfigOrigin origin, boolean value) { - return newValue(new ConfigBoolean(origin, value)); + return newValue(new ConfigBoolean(origin, value), "" + value); } } diff --git a/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala b/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala index c380462d..2c5902ae 100644 --- a/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala +++ b/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala @@ -611,12 +611,13 @@ abstract trait TestUtils { def tokenFalse = Tokens.newBoolean(fakeOrigin(), false) def tokenNull = Tokens.newNull(fakeOrigin()) def tokenUnquoted(s: String) = Tokens.newUnquotedText(fakeOrigin(), s) - def tokenString(s: String) = Tokens.newString(fakeOrigin(), s) + def tokenString(s: String) = Tokens.newString(fakeOrigin(), s, s) def tokenDouble(d: Double) = Tokens.newDouble(fakeOrigin(), d, null) def tokenInt(i: Int) = Tokens.newInt(fakeOrigin(), i, null) def tokenLong(l: Long) = Tokens.newLong(fakeOrigin(), l, null) def tokenLine(line: Int) = Tokens.newLine(fakeOrigin.withLineNumber(line)) def tokenComment(text: String) = Tokens.newComment(fakeOrigin(), text) + def tokenWhitespace(text: String) = Tokens.newIgnoredWhitespace(fakeOrigin(), text) private def tokenMaybeOptionalSubstitution(optional: Boolean, expression: Token*) = { val l = new java.util.ArrayList[Token] @@ -657,6 +658,10 @@ abstract trait TestUtils { tokenize(s).asScala.toList } + def tokenizeAsString(s: String) = { + Tokenizer.render(tokenize(s)) + } + // this is importantly NOT using Path.newPath, which relies on // the parser; in the test suite we are often testing the parser, // so we don't want to use the parser to build the expected result. diff --git a/config/src/test/scala/com/typesafe/config/impl/TokenizerTest.scala b/config/src/test/scala/com/typesafe/config/impl/TokenizerTest.scala index e628d816..1e24032a 100644 --- a/config/src/test/scala/com/typesafe/config/impl/TokenizerTest.scala +++ b/config/src/test/scala/com/typesafe/config/impl/TokenizerTest.scala @@ -14,18 +14,23 @@ class TokenizerTest extends TestUtils { private def tokenizerTest(expected: List[Token], s: String) { assertEquals(List(Tokens.START) ++ expected ++ List(Tokens.END), tokenizeAsList(s)) + assertEquals(s, tokenizeAsString(s)) } @Test def tokenizeEmptyString() { + val source = "" assertEquals(List(Tokens.START, Tokens.END), - tokenizeAsList("")) + tokenizeAsList(source)) + assertEquals(source, tokenizeAsString(source)) } @Test def tokenizeNewlines() { + val source = "\n\n" assertEquals(List(Tokens.START, tokenLine(1), tokenLine(2), Tokens.END), - tokenizeAsList("\n\n")) + tokenizeAsList(source)) + assertEquals(source, tokenizeAsString(source)) } @Test @@ -33,75 +38,95 @@ class TokenizerTest extends TestUtils { // all token types with no spaces (not sure JSON spec wants this to work, // but spec is unclear to me when spaces are required, and banning them // is actually extra work). + val source = """,:=}{][+="foo"""" + "\"\"\"bar\"\"\"" + """true3.14false42null${a.b}${?x.y}${"c.d"}""" + "\n" val expected = List(Tokens.START, Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY, Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"), tokenString("bar"), tokenTrue, tokenDouble(3.14), tokenFalse, tokenLong(42), tokenNull, tokenSubstitution(tokenUnquoted("a.b")), tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenKeySubstitution("c.d"), tokenLine(1), Tokens.END) - assertEquals(expected, tokenizeAsList(""",:=}{][+="foo"""" + "\"\"\"bar\"\"\"" + """true3.14false42null${a.b}${?x.y}${"c.d"}""" + "\n")) + assertEquals(expected, tokenizeAsList(source)) + assertEquals(source, tokenizeAsString(source)) } @Test def tokenizeAllTypesWithSingleSpaces() { - val expected = List(Tokens.START, Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY, - Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"), - tokenUnquoted(" "), tokenString("bar"), tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "), tokenTrue, tokenUnquoted(" "), - tokenDouble(3.14), tokenUnquoted(" "), tokenFalse, tokenUnquoted(" "), tokenNull, + val source = """ , : = } { ] [ += "foo" """ + "\"\"\"bar\"\"\"" + """ 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n " + val expected = List(Tokens.START, tokenWhitespace(" "), Tokens.COMMA, tokenWhitespace(" "), Tokens.COLON, tokenWhitespace(" "), + Tokens.EQUALS, tokenWhitespace(" "), Tokens.CLOSE_CURLY, tokenWhitespace(" "), Tokens.OPEN_CURLY, tokenWhitespace(" "), + Tokens.CLOSE_SQUARE, tokenWhitespace(" "), Tokens.OPEN_SQUARE, tokenWhitespace(" "), Tokens.PLUS_EQUALS, tokenWhitespace(" "), + tokenString("foo"), tokenUnquoted(" "), tokenString("bar"), tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "), + tokenTrue, tokenUnquoted(" "), tokenDouble(3.14), tokenUnquoted(" "), tokenFalse, tokenUnquoted(" "), tokenNull, tokenUnquoted(" "), tokenSubstitution(tokenUnquoted("a.b")), tokenUnquoted(" "), tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenUnquoted(" "), - tokenKeySubstitution("c.d"), - tokenLine(1), Tokens.END) - assertEquals(expected, tokenizeAsList(""" , : = } { ] [ += "foo" """ + "\"\"\"bar\"\"\"" + """ 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n ")) + tokenKeySubstitution("c.d"), tokenWhitespace(" "), + tokenLine(1), tokenWhitespace(" "), Tokens.END) + assertEquals(expected, tokenizeAsList(source)) + assertEquals(source, tokenizeAsString(source)) } @Test def tokenizeAllTypesWithMultipleSpaces() { - val expected = List(Tokens.START, Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY, - Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"), + val source = """ , : = } { ] [ += "foo" """ + "\"\"\"bar\"\"\"" + """ 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n " + val expected = List(Tokens.START, tokenWhitespace(" "), Tokens.COMMA, tokenWhitespace(" "), Tokens.COLON, tokenWhitespace(" "), + Tokens.EQUALS, tokenWhitespace(" "), Tokens.CLOSE_CURLY, tokenWhitespace(" "), Tokens.OPEN_CURLY, tokenWhitespace(" "), Tokens.CLOSE_SQUARE, + tokenWhitespace(" "), Tokens.OPEN_SQUARE, tokenWhitespace(" "), Tokens.PLUS_EQUALS, tokenWhitespace(" "), tokenString("foo"), tokenUnquoted(" "), tokenString("bar"), tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "), tokenTrue, tokenUnquoted(" "), tokenDouble(3.14), tokenUnquoted(" "), tokenFalse, tokenUnquoted(" "), tokenNull, tokenUnquoted(" "), tokenSubstitution(tokenUnquoted("a.b")), tokenUnquoted(" "), tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenUnquoted(" "), - tokenKeySubstitution("c.d"), - tokenLine(1), Tokens.END) - assertEquals(expected, tokenizeAsList(""" , : = } { ] [ += "foo" """ + "\"\"\"bar\"\"\"" + """ 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n ")) + tokenKeySubstitution("c.d"), tokenWhitespace(" "), + tokenLine(1), tokenWhitespace(" "), Tokens.END) + assertEquals(expected, tokenizeAsList(source)) + assertEquals(source, tokenizeAsString(source)) } @Test def tokenizeTrueAndUnquotedText() { + val source = """truefoo""" val expected = List(Tokens.START, tokenTrue, tokenUnquoted("foo"), Tokens.END) - assertEquals(expected, tokenizeAsList("""truefoo""")) + assertEquals(expected, tokenizeAsList(source)) + assertEquals(source, tokenizeAsString(source)) } @Test def tokenizeFalseAndUnquotedText() { + val source = """falsefoo""" val expected = List(Tokens.START, tokenFalse, tokenUnquoted("foo"), Tokens.END) - assertEquals(expected, tokenizeAsList("""falsefoo""")) + assertEquals(expected, tokenizeAsList(source)) + assertEquals(source, tokenizeAsString(source)) } @Test def tokenizeNullAndUnquotedText() { + val source = """nullfoo""" val expected = List(Tokens.START, tokenNull, tokenUnquoted("foo"), Tokens.END) - assertEquals(expected, tokenizeAsList("""nullfoo""")) + assertEquals(expected, tokenizeAsList(source)) + assertEquals(source, tokenizeAsString(source)) } @Test def tokenizeUnquotedTextContainingTrue() { + val source = """footrue""" val expected = List(Tokens.START, tokenUnquoted("footrue"), Tokens.END) - assertEquals(expected, tokenizeAsList("""footrue""")) + assertEquals(expected, tokenizeAsList(source)) + assertEquals(source, tokenizeAsString(source)) } @Test def tokenizeUnquotedTextContainingSpaceTrue() { + val source = """foo true""" val expected = List(Tokens.START, tokenUnquoted("foo"), tokenUnquoted(" "), tokenTrue, Tokens.END) - assertEquals(expected, tokenizeAsList("""foo true""")) + assertEquals(expected, tokenizeAsList(source)) + assertEquals(source, tokenizeAsString(source)) } @Test def tokenizeTrueAndSpaceAndUnquotedText() { + val source = """true foo""" val expected = List(Tokens.START, tokenTrue, tokenUnquoted(" "), tokenUnquoted("foo"), Tokens.END) - assertEquals(expected, tokenizeAsList("""true foo""")) + assertEquals(expected, tokenizeAsList(source)) + assertEquals(source, tokenizeAsString(source)) } @Test @@ -109,28 +134,36 @@ class TokenizerTest extends TestUtils { tokenizerTest(List(tokenUnquoted("a/b/c/")), "a/b/c/") tokenizerTest(List(tokenUnquoted("/")), "/") tokenizerTest(List(tokenUnquoted("/"), tokenUnquoted(" "), tokenUnquoted("/")), "/ /") - tokenizerTest(List(tokenComment("")), "//") + //tokenizerTest(List(tokenComment("")), "//") } @Test - def tokenizeUnquotedTextTrimsSpaces() { - val expected = List(Tokens.START, tokenUnquoted("foo"), tokenLine(1), Tokens.END) - assertEquals(expected, tokenizeAsList(" foo \n")) + def tokenizeUnquotedTextKeepsSpaces() { + val source = " foo \n" + val expected = List(Tokens.START, tokenWhitespace(" "), tokenUnquoted("foo"), tokenWhitespace(" "), + tokenLine(1), Tokens.END) + assertEquals(expected, tokenizeAsList(source)) + assertEquals(source, tokenizeAsString(source)) } @Test def tokenizeUnquotedTextKeepsInternalSpaces() { - val expected = List(Tokens.START, tokenUnquoted("foo"), tokenUnquoted(" "), tokenUnquoted("bar"), - tokenUnquoted(" "), tokenUnquoted("baz"), tokenLine(1), Tokens.END) + val source = " foo bar baz \n" + val expected = List(Tokens.START, tokenWhitespace(" "), tokenUnquoted("foo"), tokenUnquoted(" "), + tokenUnquoted("bar"), tokenUnquoted(" "), tokenUnquoted("baz"), tokenWhitespace(" "), + tokenLine(1), Tokens.END) assertEquals(expected, tokenizeAsList(" foo bar baz \n")) + assertEquals(source, tokenizeAsString(source)) } @Test def tokenizeMixedUnquotedQuoted() { - val expected = List(Tokens.START, tokenUnquoted("foo"), - tokenString("bar"), tokenUnquoted("baz"), + val source = " foo\"bar\"baz \n" + val expected = List(Tokens.START, tokenWhitespace(" "), tokenUnquoted("foo"), + tokenString("bar"), tokenUnquoted("baz"), tokenWhitespace(" "), tokenLine(1), Tokens.END) assertEquals(expected, tokenizeAsList(" foo\"bar\"baz \n")) + assertEquals(source, tokenizeAsString(source)) } @Test @@ -147,13 +180,15 @@ class TokenizerTest extends TestUtils { val tests = List[UnescapeTest]((""" "" """, ""), (" \"\\u0000\" ", Character.toString(0)), // nul byte (""" "\"\\\/\b\f\n\r\t" """, "\"\\/\b\f\n\r\t"), - ("\"\\u0046\"", "F"), - ("\"\\u0046\\u0046\"", "FF")) + (" \"\\u0046\" ", "F"), + (" \"\\u0046\\u0046\" ", "FF")) for (t <- tests) { describeFailure(t.toString) { - assertEquals(List(Tokens.START, Tokens.newValue(t.result), Tokens.END), + assertEquals(List(Tokens.START, tokenWhitespace(" "), Tokens.newValue(t.result, t.toString), + tokenWhitespace(" "), Tokens.END), tokenizeAsList(t.escaped)) + assertEquals(t.escaped, tokenizeAsString(t.escaped)) } } } @@ -182,32 +217,42 @@ class TokenizerTest extends TestUtils { @Test def tokenizerEmptyTripleQuoted(): Unit = { + val source = "\"\"\"\"\"\"" assertEquals(List(Tokens.START, tokenString(""), Tokens.END), - tokenizeAsList("\"\"\"\"\"\"")) + tokenizeAsList(source)) + assertEquals(source, tokenizeAsString(source)) } @Test def tokenizerTrivialTripleQuoted(): Unit = { + val source = "\"\"\"bar\"\"\"" assertEquals(List(Tokens.START, tokenString("bar"), Tokens.END), - tokenizeAsList("\"\"\"bar\"\"\"")) + tokenizeAsList(source)) + assertEquals(source, tokenizeAsString(source)) } @Test def tokenizerNoEscapesInTripleQuoted(): Unit = { + val source = "\"\"\"\\n\"\"\"" assertEquals(List(Tokens.START, tokenString("\\n"), Tokens.END), - tokenizeAsList("\"\"\"\\n\"\"\"")) + tokenizeAsList(source)) + assertEquals(source, tokenizeAsString(source)) } @Test def tokenizerTrailingQuotesInTripleQuoted(): Unit = { + val source = "\"\"\"\"\"\"\"\"\"" assertEquals(List(Tokens.START, tokenString("\"\"\""), Tokens.END), - tokenizeAsList("\"\"\"\"\"\"\"\"\"")) + tokenizeAsList(source)) + assertEquals(source, tokenizeAsString(source)) } @Test def tokenizerNewlineInTripleQuoted(): Unit = { + val source = "\"\"\"foo\nbar\"\"\"" assertEquals(List(Tokens.START, tokenString("foo\nbar"), Tokens.END), - tokenizeAsList("\"\"\"foo\nbar\"\"\"")) + tokenizeAsList(source)) + assertEquals(source, tokenizeAsString(source)) } @Test @@ -231,23 +276,24 @@ class TokenizerTest extends TestUtils { describeFailure(t.toString()) { assertEquals(List(Tokens.START, t.result, Tokens.END), tokenizeAsList(t.s)) + assertEquals(t.s, tokenizeAsString(t.s)) } } } @Test def commentsHandledInVariousContexts() { - tokenizerTest(List(tokenString("//bar")), "\"//bar\"") - tokenizerTest(List(tokenString("#bar")), "\"#bar\"") - tokenizerTest(List(tokenUnquoted("bar"), tokenComment("comment")), "bar//comment") - tokenizerTest(List(tokenUnquoted("bar"), tokenComment("comment")), "bar#comment") - tokenizerTest(List(tokenInt(10), tokenComment("comment")), "10//comment") - tokenizerTest(List(tokenInt(10), tokenComment("comment")), "10#comment") - tokenizerTest(List(tokenDouble(3.14), tokenComment("comment")), "3.14//comment") - tokenizerTest(List(tokenDouble(3.14), tokenComment("comment")), "3.14#comment") - // be sure we keep the newline - tokenizerTest(List(tokenInt(10), tokenComment("comment"), tokenLine(1), tokenInt(12)), "10//comment\n12") - tokenizerTest(List(tokenInt(10), tokenComment("comment"), tokenLine(1), tokenInt(12)), "10#comment\n12") + // tokenizerTest(List(tokenString("//bar")), "\"//bar\"") + // tokenizerTest(List(tokenString("#bar")), "\"#bar\"") + // tokenizerTest(List(tokenUnquoted("bar"), tokenComment("comment")), "bar//comment") + // tokenizerTest(List(tokenUnquoted("bar"), tokenComment("comment")), "bar#comment") + // tokenizerTest(List(tokenInt(10), tokenComment("comment")), "10//comment") + // tokenizerTest(List(tokenInt(10), tokenComment("comment")), "10#comment") + // tokenizerTest(List(tokenDouble(3.14), tokenComment("comment")), "3.14//comment") + // tokenizerTest(List(tokenDouble(3.14), tokenComment("comment")), "3.14#comment") + // // be sure we keep the newline + // tokenizerTest(List(tokenInt(10), tokenComment("comment"), tokenLine(1), tokenInt(12)), "10//comment\n12") + // tokenizerTest(List(tokenInt(10), tokenComment("comment"), tokenLine(1), tokenInt(12)), "10#comment\n12") } @Test From 2b1c7ef13d21ea28bdacb326603f48a02c5501c9 Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Mon, 2 Mar 2015 11:54:43 -0800 Subject: [PATCH 2/3] Add lossless comment tokens Preserve the original text of comments in comment tokens so they can be reproduced by the TokenIterator's render() method. --- .../com/typesafe/config/impl/Tokenizer.java | 4 ++- .../java/com/typesafe/config/impl/Tokens.java | 8 ++--- .../com/typesafe/config/impl/TestUtils.scala | 2 +- .../typesafe/config/impl/TokenizerTest.scala | 29 +++++++++++-------- 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/config/src/main/java/com/typesafe/config/impl/Tokenizer.java b/config/src/main/java/com/typesafe/config/impl/Tokenizer.java index fe4e08bd..fcb6bd93 100644 --- a/config/src/main/java/com/typesafe/config/impl/Tokenizer.java +++ b/config/src/main/java/com/typesafe/config/impl/Tokenizer.java @@ -278,10 +278,12 @@ final class Tokenizer { // ONE char has always been consumed, either the # or the first /, but // not both slashes private Token pullComment(int firstChar) { + boolean doubleSlash = false; if (firstChar == '/') { int discard = nextCharRaw(); if (discard != '/') throw new ConfigException.BugOrBroken("called pullComment but // not seen"); + doubleSlash = true; } StringBuilder sb = new StringBuilder(); @@ -289,7 +291,7 @@ final class Tokenizer { int c = nextCharRaw(); if (c == -1 || c == '\n') { putBack(c); - return Tokens.newComment(lineOrigin, sb.toString()); + return Tokens.newComment(lineOrigin, sb.toString(), doubleSlash); } else { sb.appendCodePoint(c); } diff --git a/config/src/main/java/com/typesafe/config/impl/Tokens.java b/config/src/main/java/com/typesafe/config/impl/Tokens.java index f328c1af..dbe7d48f 100644 --- a/config/src/main/java/com/typesafe/config/impl/Tokens.java +++ b/config/src/main/java/com/typesafe/config/impl/Tokens.java @@ -198,8 +198,8 @@ final class Tokens { static private class Comment extends Token { final private String text; - Comment(ConfigOrigin origin, String text) { - super(TokenType.COMMENT, origin); + Comment(ConfigOrigin origin, String text, boolean doubleSlash) { + super(TokenType.COMMENT, origin, (doubleSlash? "//" : "#") + text); this.text = text; } @@ -409,8 +409,8 @@ final class Tokens { return new Problem(origin, what, message, suggestQuotes, cause); } - static Token newComment(ConfigOrigin origin, String text) { - return new Comment(origin, text); + static Token newComment(ConfigOrigin origin, String text, boolean doubleSlash) { + return new Comment(origin, text, doubleSlash); } static Token newUnquotedText(ConfigOrigin origin, String s) { diff --git a/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala b/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala index 2c5902ae..c1313342 100644 --- a/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala +++ b/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala @@ -616,7 +616,7 @@ abstract trait TestUtils { def tokenInt(i: Int) = Tokens.newInt(fakeOrigin(), i, null) def tokenLong(l: Long) = Tokens.newLong(fakeOrigin(), l, null) def tokenLine(line: Int) = Tokens.newLine(fakeOrigin.withLineNumber(line)) - def tokenComment(text: String) = Tokens.newComment(fakeOrigin(), text) + def tokenComment(text: String, doubleSlash : Boolean) = Tokens.newComment(fakeOrigin(), text, doubleSlash) def tokenWhitespace(text: String) = Tokens.newIgnoredWhitespace(fakeOrigin(), text) private def tokenMaybeOptionalSubstitution(optional: Boolean, expression: Token*) = { diff --git a/config/src/test/scala/com/typesafe/config/impl/TokenizerTest.scala b/config/src/test/scala/com/typesafe/config/impl/TokenizerTest.scala index 1e24032a..c8e29171 100644 --- a/config/src/test/scala/com/typesafe/config/impl/TokenizerTest.scala +++ b/config/src/test/scala/com/typesafe/config/impl/TokenizerTest.scala @@ -134,7 +134,7 @@ class TokenizerTest extends TestUtils { tokenizerTest(List(tokenUnquoted("a/b/c/")), "a/b/c/") tokenizerTest(List(tokenUnquoted("/")), "/") tokenizerTest(List(tokenUnquoted("/"), tokenUnquoted(" "), tokenUnquoted("/")), "/ /") - //tokenizerTest(List(tokenComment("")), "//") + tokenizerTest(List(tokenComment("", true)), "//") } @Test @@ -283,17 +283,22 @@ class TokenizerTest extends TestUtils { @Test def commentsHandledInVariousContexts() { - // tokenizerTest(List(tokenString("//bar")), "\"//bar\"") - // tokenizerTest(List(tokenString("#bar")), "\"#bar\"") - // tokenizerTest(List(tokenUnquoted("bar"), tokenComment("comment")), "bar//comment") - // tokenizerTest(List(tokenUnquoted("bar"), tokenComment("comment")), "bar#comment") - // tokenizerTest(List(tokenInt(10), tokenComment("comment")), "10//comment") - // tokenizerTest(List(tokenInt(10), tokenComment("comment")), "10#comment") - // tokenizerTest(List(tokenDouble(3.14), tokenComment("comment")), "3.14//comment") - // tokenizerTest(List(tokenDouble(3.14), tokenComment("comment")), "3.14#comment") - // // be sure we keep the newline - // tokenizerTest(List(tokenInt(10), tokenComment("comment"), tokenLine(1), tokenInt(12)), "10//comment\n12") - // tokenizerTest(List(tokenInt(10), tokenComment("comment"), tokenLine(1), tokenInt(12)), "10#comment\n12") + tokenizerTest(List(tokenString("//bar")), "\"//bar\"") + tokenizerTest(List(tokenString("#bar")), "\"#bar\"") + tokenizerTest(List(tokenUnquoted("bar"), tokenComment("comment", true)), "bar//comment") + tokenizerTest(List(tokenUnquoted("bar"), tokenComment("comment", false)), "bar#comment") + tokenizerTest(List(tokenInt(10), tokenComment("comment", true)), "10//comment") + tokenizerTest(List(tokenInt(10), tokenComment("comment", false)), "10#comment") + tokenizerTest(List(tokenDouble(3.14), tokenComment("comment", true)), "3.14//comment") + tokenizerTest(List(tokenDouble(3.14), tokenComment("comment", false)), "3.14#comment") + // be sure we keep the newline + tokenizerTest(List(tokenInt(10), tokenComment("comment", true), tokenLine(1), tokenInt(12)), "10//comment\n12") + tokenizerTest(List(tokenInt(10), tokenComment("comment", false), tokenLine(1), tokenInt(12)), "10#comment\n12") + // be sure we handle multi-line comments + tokenizerTest(List(tokenComment("comment", true), tokenLine(1), tokenComment("comment2", true)), + "//comment\n//comment2") + tokenizerTest(List(tokenComment("comment", false), tokenLine(1), tokenComment("comment2", false)), + "#comment\n#comment2") } @Test From cb86fb136e16aa677fa8df804d6e470435f29777 Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Tue, 3 Mar 2015 15:27:41 -0800 Subject: [PATCH 3/3] Cleanup lossless tokens code based on feedback Clean up the lossless tokens code based on feedback. * Change nextTokenFromIterator() method to nextTokenIgnoringWhitespace() * Use StringBuilder in Tokenizer.render() * Extract Whitespace token creation into a `createWhitespaceTokenFromSaver` method * Add DoubleSlashComment and HashComment subclasses of Comment. Make Comment abstract. * Make `tokenText()` method virtual and overload it in subclasses that derive their original token text entirely from other fields. * Use `tokenizerTest` method in all relevant Tokenizer tests * Add extra multi-line comment Tokenizer tests --- .../java/com/typesafe/config/impl/Parser.java | 8 +- .../java/com/typesafe/config/impl/Token.java | 2 +- .../com/typesafe/config/impl/Tokenizer.java | 62 ++++--- .../java/com/typesafe/config/impl/Tokens.java | 65 ++++++-- .../com/typesafe/config/impl/TestUtils.scala | 3 +- .../typesafe/config/impl/TokenizerTest.scala | 151 ++++++++---------- 6 files changed, 161 insertions(+), 130 deletions(-) diff --git a/config/src/main/java/com/typesafe/config/impl/Parser.java b/config/src/main/java/com/typesafe/config/impl/Parser.java index bff033cb..6287c063 100644 --- a/config/src/main/java/com/typesafe/config/impl/Parser.java +++ b/config/src/main/java/com/typesafe/config/impl/Parser.java @@ -203,7 +203,7 @@ final class Parser { } previous = next; - next = nextTokenFromIterator(); + next = nextTokenIgnoringWhitespace(); } // put our concluding token in the queue with all the comments @@ -219,7 +219,7 @@ final class Parser { private TokenWithComments popTokenWithoutTrailingComment() { if (buffer.isEmpty()) { - Token t = nextTokenFromIterator(); + Token t = nextTokenIgnoringWhitespace(); if (Tokens.isComment(t)) { consolidateCommentBlock(t); return buffer.pop(); @@ -243,7 +243,7 @@ final class Parser { if (!attractsTrailingComments(withPrecedingComments.token)) { return withPrecedingComments; } else if (buffer.isEmpty()) { - Token after = nextTokenFromIterator(); + Token after = nextTokenIgnoringWhitespace(); if (Tokens.isComment(after)) { return withPrecedingComments.add(after); } else { @@ -321,7 +321,7 @@ final class Parser { // Grabs the next Token off of the TokenIterator, ignoring // IgnoredWhitespace tokens - private Token nextTokenFromIterator() { + private Token nextTokenIgnoringWhitespace() { Token t; do { t = tokens.next(); diff --git a/config/src/main/java/com/typesafe/config/impl/Token.java b/config/src/main/java/com/typesafe/config/impl/Token.java index 1c199850..af1321a3 100644 --- a/config/src/main/java/com/typesafe/config/impl/Token.java +++ b/config/src/main/java/com/typesafe/config/impl/Token.java @@ -36,7 +36,7 @@ class Token { return tokenType; } - final String tokenText() { return tokenText; } + public String tokenText() { return tokenText; } // this is final because we don't always use the origin() accessor, // and we don't because it throws if origin is null diff --git a/config/src/main/java/com/typesafe/config/impl/Tokenizer.java b/config/src/main/java/com/typesafe/config/impl/Tokenizer.java index fcb6bd93..d421dfb8 100644 --- a/config/src/main/java/com/typesafe/config/impl/Tokenizer.java +++ b/config/src/main/java/com/typesafe/config/impl/Tokenizer.java @@ -53,11 +53,11 @@ final class Tokenizer { } static String render(Iterator tokens) { - String renderedText = ""; + StringBuilder renderedText = new StringBuilder(); while (tokens.hasNext()) { - renderedText += tokens.next().tokenText(); + renderedText.append(tokens.next().tokenText()); } - return renderedText; + return renderedText.toString(); } private static class TokenIterator implements Iterator { @@ -90,14 +90,7 @@ final class Tokenizer { // simple values. private Token nextIsNotASimpleValue(ConfigOrigin baseOrigin, int lineNumber) { lastTokenWasSimpleValue = false; - - if (whitespace.length() > 0) { - Token t = Tokens.newIgnoredWhitespace(lineOrigin(baseOrigin, lineNumber), - whitespace.toString()); - whitespace.setLength(0); - return t; - } - return null; + return createWhitespaceTokenFromSaver(baseOrigin, lineNumber); } // called if the next token IS a simple value, @@ -105,29 +98,29 @@ final class Tokenizer { // token also was. private Token nextIsASimpleValue(ConfigOrigin baseOrigin, int lineNumber) { - if (lastTokenWasSimpleValue) { - // need to save whitespace between the two so - // the parser has the option to concatenate it. - if (whitespace.length() > 0) { - Token t = Tokens.newUnquotedText( - lineOrigin(baseOrigin, lineNumber), - whitespace.toString()); - whitespace.setLength(0); // reset - return t; - } else { - // lastTokenWasSimpleValue = true still - return null; - } - } else { + Token t = createWhitespaceTokenFromSaver(baseOrigin, lineNumber); + if (!lastTokenWasSimpleValue) { lastTokenWasSimpleValue = true; - if (whitespace.length() > 0) { - Token t = Tokens.newIgnoredWhitespace(lineOrigin(baseOrigin, lineNumber), - whitespace.toString()); - whitespace.setLength(0); - return t; - } - return null; } + return t; + } + + private Token createWhitespaceTokenFromSaver(ConfigOrigin baseOrigin, + int lineNumber) { + if (whitespace.length() > 0) { + Token t; + if (lastTokenWasSimpleValue) { + t = Tokens.newUnquotedText( + lineOrigin(baseOrigin, lineNumber), + whitespace.toString()); + } else { + t = Tokens.newIgnoredWhitespace(lineOrigin(baseOrigin, lineNumber), + whitespace.toString()); + } + whitespace.setLength(0); // reset + return t; + } + return null; } } @@ -291,7 +284,10 @@ final class Tokenizer { int c = nextCharRaw(); if (c == -1 || c == '\n') { putBack(c); - return Tokens.newComment(lineOrigin, sb.toString(), doubleSlash); + if (doubleSlash) + return Tokens.newCommentDoubleSlash(lineOrigin, sb.toString()); + else + return Tokens.newCommentHash(lineOrigin, sb.toString()); } else { sb.appendCodePoint(c); } diff --git a/config/src/main/java/com/typesafe/config/impl/Tokens.java b/config/src/main/java/com/typesafe/config/impl/Tokens.java index dbe7d48f..167adf39 100644 --- a/config/src/main/java/com/typesafe/config/impl/Tokens.java +++ b/config/src/main/java/com/typesafe/config/impl/Tokens.java @@ -54,7 +54,7 @@ final class Tokens { static private class Line extends Token { Line(ConfigOrigin origin) { - super(TokenType.NEWLINE, origin, "\n"); + super(TokenType.NEWLINE, origin); } @Override @@ -76,6 +76,11 @@ final class Tokens { public int hashCode() { return 41 * (41 + super.hashCode()) + lineNumber(); } + + @Override + public String tokenText() { + return "\n"; + } } // This is not a Value, because it requires special processing @@ -83,7 +88,7 @@ final class Tokens { final private String value; UnquotedText(ConfigOrigin origin, String s) { - super(TokenType.UNQUOTED_TEXT, origin, s); + super(TokenType.UNQUOTED_TEXT, origin); this.value = s; } @@ -111,13 +116,18 @@ final class Tokens { public int hashCode() { return 41 * (41 + super.hashCode()) + value.hashCode(); } + + @Override + public String tokenText() { + return value; + } } static private class IgnoredWhitespace extends Token { final private String value; IgnoredWhitespace(ConfigOrigin origin, String s) { - super(TokenType.IGNORED_WHITESPACE, origin, s); + super(TokenType.IGNORED_WHITESPACE, origin); this.value = s; } @@ -125,6 +135,11 @@ final class Tokens { @Override public String toString() { return "'" + value + "' (WHITESPACE)"; } + + @Override + public String tokenText() { + return value; + } } static private class Problem extends Token { @@ -195,14 +210,36 @@ final class Tokens { } } - static private class Comment extends Token { + static private abstract class Comment extends Token { final private String text; - Comment(ConfigOrigin origin, String text, boolean doubleSlash) { - super(TokenType.COMMENT, origin, (doubleSlash? "//" : "#") + text); + Comment(ConfigOrigin origin, String text) { + super(TokenType.COMMENT, origin); this.text = text; } + final static class DoubleSlashComment extends Comment { + DoubleSlashComment(ConfigOrigin origin, String text) { + super(origin, text); + } + + @Override + public String tokenText() { + return "//" + super.text; + } + } + + final static class HashComment extends Comment { + HashComment(ConfigOrigin origin, String text) { + super(origin, text); + } + + @Override + public String tokenText() { + return "#" + super.text; + } + } + String text() { return text; } @@ -240,8 +277,7 @@ final class Tokens { final private List value; Substitution(ConfigOrigin origin, boolean optional, List expression) { - super(TokenType.SUBSTITUTION, origin, - "${" + (optional? "?" : "") + Tokenizer.render(expression.iterator()) + "}"); + super(TokenType.SUBSTITUTION, origin); this.optional = optional; this.value = expression; } @@ -254,6 +290,11 @@ final class Tokens { return value; } + @Override + public String tokenText() { + return "${" + (this.optional? "?" : "") + Tokenizer.render(this.value.iterator()) + "}"; + } + @Override public String toString() { StringBuilder sb = new StringBuilder(); @@ -409,8 +450,12 @@ final class Tokens { return new Problem(origin, what, message, suggestQuotes, cause); } - static Token newComment(ConfigOrigin origin, String text, boolean doubleSlash) { - return new Comment(origin, text, doubleSlash); + static Token newCommentDoubleSlash(ConfigOrigin origin, String text) { + return new Comment.DoubleSlashComment(origin, text); + } + + static Token newCommentHash(ConfigOrigin origin, String text) { + return new Comment.HashComment(origin, text); } static Token newUnquotedText(ConfigOrigin origin, String s) { diff --git a/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala b/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala index c1313342..b8536960 100644 --- a/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala +++ b/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala @@ -616,7 +616,8 @@ abstract trait TestUtils { def tokenInt(i: Int) = Tokens.newInt(fakeOrigin(), i, null) def tokenLong(l: Long) = Tokens.newLong(fakeOrigin(), l, null) def tokenLine(line: Int) = Tokens.newLine(fakeOrigin.withLineNumber(line)) - def tokenComment(text: String, doubleSlash : Boolean) = Tokens.newComment(fakeOrigin(), text, doubleSlash) + def tokenCommentDoubleSlash(text: String) = Tokens.newCommentDoubleSlash(fakeOrigin(), text) + def tokenCommentHash(text: String) = Tokens.newCommentHash(fakeOrigin(), text) def tokenWhitespace(text: String) = Tokens.newIgnoredWhitespace(fakeOrigin(), text) private def tokenMaybeOptionalSubstitution(optional: Boolean, expression: Token*) = { diff --git a/config/src/test/scala/com/typesafe/config/impl/TokenizerTest.scala b/config/src/test/scala/com/typesafe/config/impl/TokenizerTest.scala index c8e29171..a6312a36 100644 --- a/config/src/test/scala/com/typesafe/config/impl/TokenizerTest.scala +++ b/config/src/test/scala/com/typesafe/config/impl/TokenizerTest.scala @@ -20,17 +20,15 @@ class TokenizerTest extends TestUtils { @Test def tokenizeEmptyString() { val source = "" - assertEquals(List(Tokens.START, Tokens.END), - tokenizeAsList(source)) - assertEquals(source, tokenizeAsString(source)) + val expected = List() + tokenizerTest(expected, source) } @Test def tokenizeNewlines() { val source = "\n\n" - assertEquals(List(Tokens.START, tokenLine(1), tokenLine(2), Tokens.END), - tokenizeAsList(source)) - assertEquals(source, tokenizeAsString(source)) + val expected = List(tokenLine(1), tokenLine(2)) + tokenizerTest(expected, source) } @Test @@ -39,20 +37,19 @@ class TokenizerTest extends TestUtils { // but spec is unclear to me when spaces are required, and banning them // is actually extra work). val source = """,:=}{][+="foo"""" + "\"\"\"bar\"\"\"" + """true3.14false42null${a.b}${?x.y}${"c.d"}""" + "\n" - val expected = List(Tokens.START, Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY, + val expected = List(Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY, Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"), tokenString("bar"), tokenTrue, tokenDouble(3.14), tokenFalse, tokenLong(42), tokenNull, tokenSubstitution(tokenUnquoted("a.b")), tokenOptionalSubstitution(tokenUnquoted("x.y")), - tokenKeySubstitution("c.d"), tokenLine(1), Tokens.END) - assertEquals(expected, tokenizeAsList(source)) - assertEquals(source, tokenizeAsString(source)) + tokenKeySubstitution("c.d"), tokenLine(1)) + tokenizerTest(expected, source) } @Test def tokenizeAllTypesWithSingleSpaces() { val source = """ , : = } { ] [ += "foo" """ + "\"\"\"bar\"\"\"" + """ 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n " - val expected = List(Tokens.START, tokenWhitespace(" "), Tokens.COMMA, tokenWhitespace(" "), Tokens.COLON, tokenWhitespace(" "), + val expected = List(tokenWhitespace(" "), Tokens.COMMA, tokenWhitespace(" "), Tokens.COLON, tokenWhitespace(" "), Tokens.EQUALS, tokenWhitespace(" "), Tokens.CLOSE_CURLY, tokenWhitespace(" "), Tokens.OPEN_CURLY, tokenWhitespace(" "), Tokens.CLOSE_SQUARE, tokenWhitespace(" "), Tokens.OPEN_SQUARE, tokenWhitespace(" "), Tokens.PLUS_EQUALS, tokenWhitespace(" "), tokenString("foo"), tokenUnquoted(" "), tokenString("bar"), tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "), @@ -60,15 +57,14 @@ class TokenizerTest extends TestUtils { tokenUnquoted(" "), tokenSubstitution(tokenUnquoted("a.b")), tokenUnquoted(" "), tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenUnquoted(" "), tokenKeySubstitution("c.d"), tokenWhitespace(" "), - tokenLine(1), tokenWhitespace(" "), Tokens.END) - assertEquals(expected, tokenizeAsList(source)) - assertEquals(source, tokenizeAsString(source)) + tokenLine(1), tokenWhitespace(" ")) + tokenizerTest(expected, source) } @Test def tokenizeAllTypesWithMultipleSpaces() { val source = """ , : = } { ] [ += "foo" """ + "\"\"\"bar\"\"\"" + """ 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n " - val expected = List(Tokens.START, tokenWhitespace(" "), Tokens.COMMA, tokenWhitespace(" "), Tokens.COLON, tokenWhitespace(" "), + val expected = List(tokenWhitespace(" "), Tokens.COMMA, tokenWhitespace(" "), Tokens.COLON, tokenWhitespace(" "), Tokens.EQUALS, tokenWhitespace(" "), Tokens.CLOSE_CURLY, tokenWhitespace(" "), Tokens.OPEN_CURLY, tokenWhitespace(" "), Tokens.CLOSE_SQUARE, tokenWhitespace(" "), Tokens.OPEN_SQUARE, tokenWhitespace(" "), Tokens.PLUS_EQUALS, tokenWhitespace(" "), tokenString("foo"), tokenUnquoted(" "), tokenString("bar"), tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "), tokenTrue, tokenUnquoted(" "), @@ -76,57 +72,50 @@ class TokenizerTest extends TestUtils { tokenUnquoted(" "), tokenSubstitution(tokenUnquoted("a.b")), tokenUnquoted(" "), tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenUnquoted(" "), tokenKeySubstitution("c.d"), tokenWhitespace(" "), - tokenLine(1), tokenWhitespace(" "), Tokens.END) - assertEquals(expected, tokenizeAsList(source)) - assertEquals(source, tokenizeAsString(source)) + tokenLine(1), tokenWhitespace(" ")) + tokenizerTest(expected, source) } @Test def tokenizeTrueAndUnquotedText() { val source = """truefoo""" - val expected = List(Tokens.START, tokenTrue, tokenUnquoted("foo"), Tokens.END) - assertEquals(expected, tokenizeAsList(source)) - assertEquals(source, tokenizeAsString(source)) + val expected = List(tokenTrue, tokenUnquoted("foo")) + tokenizerTest(expected, source) } @Test def tokenizeFalseAndUnquotedText() { val source = """falsefoo""" - val expected = List(Tokens.START, tokenFalse, tokenUnquoted("foo"), Tokens.END) - assertEquals(expected, tokenizeAsList(source)) - assertEquals(source, tokenizeAsString(source)) + val expected = List(tokenFalse, tokenUnquoted("foo")) + tokenizerTest(expected, source) } @Test def tokenizeNullAndUnquotedText() { val source = """nullfoo""" - val expected = List(Tokens.START, tokenNull, tokenUnquoted("foo"), Tokens.END) - assertEquals(expected, tokenizeAsList(source)) - assertEquals(source, tokenizeAsString(source)) + val expected = List(tokenNull, tokenUnquoted("foo")) + tokenizerTest(expected, source) } @Test def tokenizeUnquotedTextContainingTrue() { val source = """footrue""" - val expected = List(Tokens.START, tokenUnquoted("footrue"), Tokens.END) - assertEquals(expected, tokenizeAsList(source)) - assertEquals(source, tokenizeAsString(source)) + val expected = List(tokenUnquoted("footrue")) + tokenizerTest(expected, source) } @Test def tokenizeUnquotedTextContainingSpaceTrue() { val source = """foo true""" - val expected = List(Tokens.START, tokenUnquoted("foo"), tokenUnquoted(" "), tokenTrue, Tokens.END) - assertEquals(expected, tokenizeAsList(source)) - assertEquals(source, tokenizeAsString(source)) + val expected = List(tokenUnquoted("foo"), tokenUnquoted(" "), tokenTrue) + tokenizerTest(expected, source) } @Test def tokenizeTrueAndSpaceAndUnquotedText() { val source = """true foo""" - val expected = List(Tokens.START, tokenTrue, tokenUnquoted(" "), tokenUnquoted("foo"), Tokens.END) - assertEquals(expected, tokenizeAsList(source)) - assertEquals(source, tokenizeAsString(source)) + val expected = List(tokenTrue, tokenUnquoted(" "), tokenUnquoted("foo")) + tokenizerTest(expected, source) } @Test @@ -134,36 +123,33 @@ class TokenizerTest extends TestUtils { tokenizerTest(List(tokenUnquoted("a/b/c/")), "a/b/c/") tokenizerTest(List(tokenUnquoted("/")), "/") tokenizerTest(List(tokenUnquoted("/"), tokenUnquoted(" "), tokenUnquoted("/")), "/ /") - tokenizerTest(List(tokenComment("", true)), "//") + tokenizerTest(List(tokenCommentDoubleSlash("")), "//") } @Test def tokenizeUnquotedTextKeepsSpaces() { val source = " foo \n" - val expected = List(Tokens.START, tokenWhitespace(" "), tokenUnquoted("foo"), tokenWhitespace(" "), - tokenLine(1), Tokens.END) - assertEquals(expected, tokenizeAsList(source)) - assertEquals(source, tokenizeAsString(source)) + val expected = List(tokenWhitespace(" "), tokenUnquoted("foo"), tokenWhitespace(" "), + tokenLine(1)) + tokenizerTest(expected, source) } @Test def tokenizeUnquotedTextKeepsInternalSpaces() { val source = " foo bar baz \n" - val expected = List(Tokens.START, tokenWhitespace(" "), tokenUnquoted("foo"), tokenUnquoted(" "), + val expected = List(tokenWhitespace(" "), tokenUnquoted("foo"), tokenUnquoted(" "), tokenUnquoted("bar"), tokenUnquoted(" "), tokenUnquoted("baz"), tokenWhitespace(" "), - tokenLine(1), Tokens.END) - assertEquals(expected, tokenizeAsList(" foo bar baz \n")) - assertEquals(source, tokenizeAsString(source)) + tokenLine(1)) + tokenizerTest(expected, source) } @Test def tokenizeMixedUnquotedQuoted() { val source = " foo\"bar\"baz \n" - val expected = List(Tokens.START, tokenWhitespace(" "), tokenUnquoted("foo"), + val expected = List(tokenWhitespace(" "), tokenUnquoted("foo"), tokenString("bar"), tokenUnquoted("baz"), tokenWhitespace(" "), - tokenLine(1), Tokens.END) - assertEquals(expected, tokenizeAsList(" foo\"bar\"baz \n")) - assertEquals(source, tokenizeAsString(source)) + tokenLine(1)) + tokenizerTest(expected, source) } @Test @@ -185,10 +171,9 @@ class TokenizerTest extends TestUtils { for (t <- tests) { describeFailure(t.toString) { - assertEquals(List(Tokens.START, tokenWhitespace(" "), Tokens.newValue(t.result, t.toString), - tokenWhitespace(" "), Tokens.END), - tokenizeAsList(t.escaped)) - assertEquals(t.escaped, tokenizeAsString(t.escaped)) + val expected = List(tokenWhitespace(" "), Tokens.newValue(t.result, t.toString), + tokenWhitespace(" ")) + tokenizerTest(expected, t.escaped) } } } @@ -218,41 +203,36 @@ class TokenizerTest extends TestUtils { @Test def tokenizerEmptyTripleQuoted(): Unit = { val source = "\"\"\"\"\"\"" - assertEquals(List(Tokens.START, tokenString(""), Tokens.END), - tokenizeAsList(source)) - assertEquals(source, tokenizeAsString(source)) + val expected = List(tokenString("")) + tokenizerTest(expected, source) } @Test def tokenizerTrivialTripleQuoted(): Unit = { val source = "\"\"\"bar\"\"\"" - assertEquals(List(Tokens.START, tokenString("bar"), Tokens.END), - tokenizeAsList(source)) - assertEquals(source, tokenizeAsString(source)) + val expected = List(tokenString("bar")) + tokenizerTest(expected, source) } @Test def tokenizerNoEscapesInTripleQuoted(): Unit = { val source = "\"\"\"\\n\"\"\"" - assertEquals(List(Tokens.START, tokenString("\\n"), Tokens.END), - tokenizeAsList(source)) - assertEquals(source, tokenizeAsString(source)) + val expected = List(tokenString("\\n")) + tokenizerTest(expected, source) } @Test def tokenizerTrailingQuotesInTripleQuoted(): Unit = { val source = "\"\"\"\"\"\"\"\"\"" - assertEquals(List(Tokens.START, tokenString("\"\"\""), Tokens.END), - tokenizeAsList(source)) - assertEquals(source, tokenizeAsString(source)) + val expected = List(tokenString("\"\"\"")) + tokenizerTest(expected, source) } @Test def tokenizerNewlineInTripleQuoted(): Unit = { val source = "\"\"\"foo\nbar\"\"\"" - assertEquals(List(Tokens.START, tokenString("foo\nbar"), Tokens.END), - tokenizeAsList(source)) - assertEquals(source, tokenizeAsString(source)) + val expected = List(tokenString("foo\nbar")) + tokenizerTest(expected, source) } @Test @@ -274,9 +254,8 @@ class TokenizerTest extends TestUtils { for (t <- tests) { describeFailure(t.toString()) { - assertEquals(List(Tokens.START, t.result, Tokens.END), - tokenizeAsList(t.s)) - assertEquals(t.s, tokenizeAsString(t.s)) + val expected = List(t.result) + tokenizerTest(expected, t.s) } } } @@ -285,20 +264,30 @@ class TokenizerTest extends TestUtils { def commentsHandledInVariousContexts() { tokenizerTest(List(tokenString("//bar")), "\"//bar\"") tokenizerTest(List(tokenString("#bar")), "\"#bar\"") - tokenizerTest(List(tokenUnquoted("bar"), tokenComment("comment", true)), "bar//comment") - tokenizerTest(List(tokenUnquoted("bar"), tokenComment("comment", false)), "bar#comment") - tokenizerTest(List(tokenInt(10), tokenComment("comment", true)), "10//comment") - tokenizerTest(List(tokenInt(10), tokenComment("comment", false)), "10#comment") - tokenizerTest(List(tokenDouble(3.14), tokenComment("comment", true)), "3.14//comment") - tokenizerTest(List(tokenDouble(3.14), tokenComment("comment", false)), "3.14#comment") + tokenizerTest(List(tokenUnquoted("bar"), tokenCommentDoubleSlash("comment")), "bar//comment") + tokenizerTest(List(tokenUnquoted("bar"), tokenCommentHash("comment")), "bar#comment") + tokenizerTest(List(tokenInt(10), tokenCommentDoubleSlash("comment")), "10//comment") + tokenizerTest(List(tokenInt(10), tokenCommentHash("comment")), "10#comment") + tokenizerTest(List(tokenDouble(3.14), tokenCommentDoubleSlash("comment")), "3.14//comment") + tokenizerTest(List(tokenDouble(3.14), tokenCommentHash("comment")), "3.14#comment") // be sure we keep the newline - tokenizerTest(List(tokenInt(10), tokenComment("comment", true), tokenLine(1), tokenInt(12)), "10//comment\n12") - tokenizerTest(List(tokenInt(10), tokenComment("comment", false), tokenLine(1), tokenInt(12)), "10#comment\n12") + tokenizerTest(List(tokenInt(10), tokenCommentDoubleSlash("comment"), tokenLine(1), tokenInt(12)), "10//comment\n12") + tokenizerTest(List(tokenInt(10), tokenCommentHash("comment"), tokenLine(1), tokenInt(12)), "10#comment\n12") // be sure we handle multi-line comments - tokenizerTest(List(tokenComment("comment", true), tokenLine(1), tokenComment("comment2", true)), + tokenizerTest(List(tokenCommentDoubleSlash("comment"), tokenLine(1), tokenCommentDoubleSlash("comment2")), "//comment\n//comment2") - tokenizerTest(List(tokenComment("comment", false), tokenLine(1), tokenComment("comment2", false)), + tokenizerTest(List(tokenCommentHash("comment"), tokenLine(1), tokenCommentHash("comment2")), "#comment\n#comment2") + tokenizerTest(List(tokenWhitespace(" "), tokenCommentDoubleSlash("comment\r"), + tokenLine(1), tokenWhitespace(" "), tokenCommentDoubleSlash("comment2 "), + tokenLine(2), tokenCommentDoubleSlash("comment3 "), + tokenLine(3), tokenLine(4), tokenCommentDoubleSlash("comment4")), + " //comment\r\n //comment2 \n//comment3 \n\n//comment4") + tokenizerTest(List(tokenWhitespace(" "), tokenCommentDoubleSlash("comment\r"), + tokenLine(1), tokenWhitespace(" "), tokenCommentDoubleSlash("comment2 "), + tokenLine(2), tokenCommentDoubleSlash("comment3 "), + tokenLine(3), tokenLine(4), tokenCommentDoubleSlash("comment4")), + " //comment\r\n //comment2 \n//comment3 \n\n//comment4") } @Test