Merge pull request #271 from fpringvaldsen/feature/lossless-tokens

Add lossless tokens
2025-01-15 23:01:05 +08:00 · 2015-03-04 13:29:20 -05:00 · 2015-03-04 13:29:20 -05:00 · f3e11bc583
commit f3e11bc583
parent fda341b44f cb86fb136e
7 changed files with 307 additions and 128 deletions
--- a/config/src/main/java/com/typesafe/config/impl/Parser.java
+++ b/config/src/main/java/com/typesafe/config/impl/Parser.java
@ -203,7 +203,7 @@ final class Parser {
                }

                previous = next;
-                next = tokens.next();
+                next = nextTokenIgnoringWhitespace();
            }

            // put our concluding token in the queue with all the comments
@ -219,7 +219,7 @@ final class Parser {

        private TokenWithComments popTokenWithoutTrailingComment() {
            if (buffer.isEmpty()) {
-                Token t = tokens.next();
+                Token t = nextTokenIgnoringWhitespace();
                if (Tokens.isComment(t)) {
                    consolidateCommentBlock(t);
                    return buffer.pop();
@ -243,7 +243,7 @@ final class Parser {
            if (!attractsTrailingComments(withPrecedingComments.token)) {
                return withPrecedingComments;
            } else if (buffer.isEmpty()) {
-                Token after = tokens.next();
+                Token after = nextTokenIgnoringWhitespace();
                if (Tokens.isComment(after)) {
                    return withPrecedingComments.add(after);
                } else {
@ -319,6 +319,16 @@ final class Parser {
            return t;
        }

+        // Grabs the next Token off of the TokenIterator, ignoring
+        // IgnoredWhitespace tokens
+        private Token nextTokenIgnoringWhitespace() {
+            Token t;
+            do {
+                t = tokens.next();
+            } while (Tokens.isIgnoredWhitespace(t));
+            return t;
+        }
+
        private AbstractConfigValue addAnyCommentsAfterAnyComma(AbstractConfigValue v) {
            TokenWithComments t = nextToken(); // do NOT skip newlines, we only
                                               // want same-line comments
@ -1063,6 +1073,11 @@ final class Parser {

        while (expression.hasNext()) {
            Token t = expression.next();
+
+            // Ignore all IgnoredWhitespace tokens
+            if (Tokens.isIgnoredWhitespace(t))
+                continue;
+
            if (Tokens.isValueWithType(t, ConfigValueType.STRING)) {
                AbstractConfigValue v = Tokens.getValue(t);
                // this is a quoted string; so any periods
--- a/config/src/main/java/com/typesafe/config/impl/Token.java
+++ b/config/src/main/java/com/typesafe/config/impl/Token.java
@ -10,26 +10,34 @@ class Token {
    final private TokenType tokenType;
    final private String debugString;
    final private ConfigOrigin origin;
+    final private String tokenText;

    Token(TokenType tokenType, ConfigOrigin origin) {
        this(tokenType, origin, null);
    }

-    Token(TokenType tokenType, ConfigOrigin origin, String debugString) {
+    Token(TokenType tokenType, ConfigOrigin origin, String tokenText) {
+        this(tokenType, origin, tokenText, null);
+    }
+
+    Token(TokenType tokenType, ConfigOrigin origin, String tokenText, String debugString) {
        this.tokenType = tokenType;
        this.origin = origin;
        this.debugString = debugString;
+        this.tokenText = tokenText;
    }

    // this is used for singleton tokens like COMMA or OPEN_CURLY
-    static Token newWithoutOrigin(TokenType tokenType, String debugString) {
-        return new Token(tokenType, null, debugString);
+    static Token newWithoutOrigin(TokenType tokenType, String debugString, String tokenText) {
+        return new Token(tokenType, null, tokenText, debugString);
    }

    final TokenType tokenType() {
        return tokenType;
    }

+    public String tokenText() { return tokenText; }
+
    // this is final because we don't always use the origin() accessor,
    // and we don't because it throws if origin is null
    final ConfigOrigin origin() {
--- a/config/src/main/java/com/typesafe/config/impl/TokenType.java
+++ b/config/src/main/java/com/typesafe/config/impl/TokenType.java
@ -16,6 +16,7 @@ enum TokenType {
    VALUE,
    NEWLINE,
    UNQUOTED_TEXT,
+    IGNORED_WHITESPACE,
    SUBSTITUTION,
    PROBLEM,
    COMMENT,
--- a/config/src/main/java/com/typesafe/config/impl/Tokenizer.java
+++ b/config/src/main/java/com/typesafe/config/impl/Tokenizer.java
@ -52,6 +52,14 @@ final class Tokenizer {
        return new TokenIterator(origin, input, flavor != ConfigSyntax.JSON);
    }

+    static String render(Iterator<Token> tokens) {
+        StringBuilder renderedText = new StringBuilder();
+        while (tokens.hasNext()) {
+            renderedText.append(tokens.next().tokenText());
+        }
+        return renderedText.toString();
+    }
+
    private static class TokenIterator implements Iterator<Token> {

        private static class WhitespaceSaver {
@ -66,25 +74,23 @@ final class Tokenizer {
            }

            void add(int c) {
-                if (lastTokenWasSimpleValue)
-                    whitespace.appendCodePoint(c);
+                whitespace.appendCodePoint(c);
            }

            Token check(Token t, ConfigOrigin baseOrigin, int lineNumber) {
                if (isSimpleValue(t)) {
                    return nextIsASimpleValue(baseOrigin, lineNumber);
                } else {
-                    nextIsNotASimpleValue();
-                    return null;
+                    return nextIsNotASimpleValue(baseOrigin, lineNumber);
                }
            }

            // called if the next token is not a simple value;
            // discards any whitespace we were saving between
            // simple values.
-            private void nextIsNotASimpleValue() {
+            private Token nextIsNotASimpleValue(ConfigOrigin baseOrigin, int lineNumber) {
                lastTokenWasSimpleValue = false;
-                whitespace.setLength(0);
+                return createWhitespaceTokenFromSaver(baseOrigin, lineNumber);
            }

            // called if the next token IS a simple value,
@ -92,24 +98,29 @@ final class Tokenizer {
            // token also was.
            private Token nextIsASimpleValue(ConfigOrigin baseOrigin,
                    int lineNumber) {
-                if (lastTokenWasSimpleValue) {
-                    // need to save whitespace between the two so
-                    // the parser has the option to concatenate it.
-                    if (whitespace.length() > 0) {
-                        Token t = Tokens.newUnquotedText(
-                                lineOrigin(baseOrigin, lineNumber),
-                                whitespace.toString());
-                        whitespace.setLength(0); // reset
-                        return t;
-                    } else {
-                        // lastTokenWasSimpleValue = true still
-                        return null;
-                    }
-                } else {
+                Token t = createWhitespaceTokenFromSaver(baseOrigin, lineNumber);
+                if (!lastTokenWasSimpleValue) {
                    lastTokenWasSimpleValue = true;
-                    whitespace.setLength(0);
-                    return null;
                }
+                return t;
+            }
+
+            private Token createWhitespaceTokenFromSaver(ConfigOrigin baseOrigin,
+                                                         int lineNumber) {
+                if (whitespace.length() > 0) {
+                    Token t;
+                    if (lastTokenWasSimpleValue) {
+                        t = Tokens.newUnquotedText(
+                            lineOrigin(baseOrigin, lineNumber),
+                            whitespace.toString());
+                    } else {
+                        t = Tokens.newIgnoredWhitespace(lineOrigin(baseOrigin, lineNumber),
+                                                        whitespace.toString());
+                    }
+                    whitespace.setLength(0); // reset
+                    return t;
+                }
+                return null;
            }
        }

@ -260,10 +271,12 @@ final class Tokenizer {
        // ONE char has always been consumed, either the # or the first /, but
        // not both slashes
        private Token pullComment(int firstChar) {
+            boolean doubleSlash = false;
            if (firstChar == '/') {
                int discard = nextCharRaw();
                if (discard != '/')
                    throw new ConfigException.BugOrBroken("called pullComment but // not seen");
+                doubleSlash = true;
            }

            StringBuilder sb = new StringBuilder();
@ -271,7 +284,10 @@ final class Tokenizer {
                int c = nextCharRaw();
                if (c == -1 || c == '\n') {
                    putBack(c);
-                    return Tokens.newComment(lineOrigin, sb.toString());
+                    if (doubleSlash)
+                        return Tokens.newCommentDoubleSlash(lineOrigin, sb.toString());
+                    else
+                        return Tokens.newCommentHash(lineOrigin, sb.toString());
                } else {
                    sb.appendCodePoint(c);
                }
@ -367,11 +383,16 @@ final class Tokenizer {
            }
        }

-        private void pullEscapeSequence(StringBuilder sb) throws ProblemException {
+        private void pullEscapeSequence(StringBuilder sb, StringBuilder sbOrig) throws ProblemException {
            int escaped = nextCharRaw();
            if (escaped == -1)
                throw problem("End of input but backslash in string had nothing after it");

+            // This is needed so we return the unescaped escape characters back out when rendering
+            // the token
+            sbOrig.appendCodePoint('\\');
+            sbOrig.appendCodePoint(escaped);
+
            switch (escaped) {
            case '"':
                sb.append('"');
@ -407,6 +428,7 @@ final class Tokenizer {
                    a[i] = (char) c;
                }
                String digits = new String(a);
+                sbOrig.append(a);
                try {
                    sb.appendCodePoint(Integer.parseInt(digits, 16));
                } catch (NumberFormatException e) {
@ -424,7 +446,7 @@ final class Tokenizer {
            }
        }

-        private void appendTripleQuotedString(StringBuilder sb) throws ProblemException {
+        private void appendTripleQuotedString(StringBuilder sb, StringBuilder sbOrig) throws ProblemException {
            // we are after the opening triple quote and need to consume the
            // close triple
            int consecutiveQuotes = 0;
@ -451,26 +473,37 @@ final class Tokenizer {
                }

                sb.appendCodePoint(c);
+                sbOrig.appendCodePoint(c);
            }
        }

        private Token pullQuotedString() throws ProblemException {
            // the open quote has already been consumed
            StringBuilder sb = new StringBuilder();
+
+            // We need a second string builder to keep track of escape characters.
+            // We want to return them exactly as they appeared in the original text,
+            // which means we will need a new StringBuilder to escape escape characters
+            // so we can also keep the actual value of the string. This is gross.
+            StringBuilder sbOrig = new StringBuilder();
+            sbOrig.appendCodePoint('"');
+
            while (true) {
                int c = nextCharRaw();
                if (c == -1)
                    throw problem("End of input but string quote was still open");

                if (c == '\\') {
-                    pullEscapeSequence(sb);
+                    pullEscapeSequence(sb, sbOrig);
                } else if (c == '"') {
+                    sbOrig.appendCodePoint(c);
                    break;
                } else if (Character.isISOControl(c)) {
                    throw problem(asString(c), "JSON does not allow unescaped " + asString(c)
                            + " in quoted strings, use a backslash escape");
                } else {
                    sb.appendCodePoint(c);
+                    sbOrig.appendCodePoint(c);
                }
            }

@ -478,13 +511,14 @@ final class Tokenizer {
            if (sb.length() == 0) {
                int third = nextCharRaw();
                if (third == '"') {
-                    appendTripleQuotedString(sb);
+                    sbOrig.appendCodePoint(third);
+                    appendTripleQuotedString(sb, sbOrig);
                } else {
                    putBack(third);
                }
-            }

-            return Tokens.newString(lineOrigin, sb.toString());
+            }
+            return Tokens.newString(lineOrigin, sb.toString(), sbOrig.toString());
        }

        private Token pullPlusEquals() throws ProblemException {
--- a/config/src/main/java/com/typesafe/config/impl/Tokens.java
+++ b/config/src/main/java/com/typesafe/config/impl/Tokens.java
@ -16,7 +16,11 @@ final class Tokens {
        final private AbstractConfigValue value;

        Value(AbstractConfigValue value) {
-            super(TokenType.VALUE, value.origin());
+            this(value, null);
+        }
+
+        Value(AbstractConfigValue value, String origText) {
+            super(TokenType.VALUE, value.origin(), origText);
            this.value = value;
        }

@ -72,6 +76,11 @@ final class Tokens {
        public int hashCode() {
            return 41 * (41 + super.hashCode()) + lineNumber();
        }
+
+        @Override
+        public String tokenText() {
+            return "\n";
+        }
    }

    // This is not a Value, because it requires special processing
@ -107,6 +116,30 @@ final class Tokens {
        public int hashCode() {
            return 41 * (41 + super.hashCode()) + value.hashCode();
        }
+
+        @Override
+        public String tokenText() {
+            return value;
+        }
+    }
+
+    static private class IgnoredWhitespace extends Token {
+        final private String value;
+
+        IgnoredWhitespace(ConfigOrigin origin, String s) {
+            super(TokenType.IGNORED_WHITESPACE, origin);
+            this.value = s;
+        }
+
+        String value() { return value; }
+
+        @Override
+        public String toString() { return "'" + value + "' (WHITESPACE)"; }
+
+        @Override
+        public String tokenText() {
+            return value;
+        }
    }

    static private class Problem extends Token {
@ -177,7 +210,7 @@ final class Tokens {
        }
    }

-    static private class Comment extends Token {
+    static private abstract class Comment extends Token {
        final private String text;

        Comment(ConfigOrigin origin, String text) {
@ -185,6 +218,28 @@ final class Tokens {
            this.text = text;
        }

+        final static class DoubleSlashComment extends Comment {
+            DoubleSlashComment(ConfigOrigin origin, String text) {
+                super(origin, text);
+            }
+
+            @Override
+            public String tokenText() {
+                return "//" + super.text;
+            }
+        }
+
+        final static class HashComment extends Comment {
+            HashComment(ConfigOrigin origin, String text) {
+                super(origin, text);
+            }
+
+            @Override
+            public String tokenText() {
+                return "#" + super.text;
+            }
+        }
+
        String text() {
            return text;
        }
@ -235,6 +290,11 @@ final class Tokens {
            return value;
        }

+        @Override
+        public String tokenText() {
+            return "${" + (this.optional? "?" : "") + Tokenizer.render(this.value.iterator()) + "}";
+        }
+
        @Override
        public String toString() {
            StringBuilder sb = new StringBuilder();
@ -344,6 +404,10 @@ final class Tokens {
        }
    }

+    static boolean isIgnoredWhitespace(Token token) {
+        return token instanceof IgnoredWhitespace;
+    }
+
    static boolean isSubstitution(Token token) {
        return token instanceof Substitution;
    }
@ -366,16 +430,16 @@ final class Tokens {
        }
    }

-    final static Token START = Token.newWithoutOrigin(TokenType.START, "start of file");
-    final static Token END = Token.newWithoutOrigin(TokenType.END, "end of file");
-    final static Token COMMA = Token.newWithoutOrigin(TokenType.COMMA, "','");
-    final static Token EQUALS = Token.newWithoutOrigin(TokenType.EQUALS, "'='");
-    final static Token COLON = Token.newWithoutOrigin(TokenType.COLON, "':'");
-    final static Token OPEN_CURLY = Token.newWithoutOrigin(TokenType.OPEN_CURLY, "'{'");
-    final static Token CLOSE_CURLY = Token.newWithoutOrigin(TokenType.CLOSE_CURLY, "'}'");
-    final static Token OPEN_SQUARE = Token.newWithoutOrigin(TokenType.OPEN_SQUARE, "'['");
-    final static Token CLOSE_SQUARE = Token.newWithoutOrigin(TokenType.CLOSE_SQUARE, "']'");
-    final static Token PLUS_EQUALS = Token.newWithoutOrigin(TokenType.PLUS_EQUALS, "'+='");
+    final static Token START = Token.newWithoutOrigin(TokenType.START, "start of file", "");
+    final static Token END = Token.newWithoutOrigin(TokenType.END, "end of file", "");
+    final static Token COMMA = Token.newWithoutOrigin(TokenType.COMMA, "','", ",");
+    final static Token EQUALS = Token.newWithoutOrigin(TokenType.EQUALS, "'='", "=");
+    final static Token COLON = Token.newWithoutOrigin(TokenType.COLON, "':'", ":");
+    final static Token OPEN_CURLY = Token.newWithoutOrigin(TokenType.OPEN_CURLY, "'{'", "{");
+    final static Token CLOSE_CURLY = Token.newWithoutOrigin(TokenType.CLOSE_CURLY, "'}'", "}");
+    final static Token OPEN_SQUARE = Token.newWithoutOrigin(TokenType.OPEN_SQUARE, "'['", "[");
+    final static Token CLOSE_SQUARE = Token.newWithoutOrigin(TokenType.CLOSE_SQUARE, "']'", "]");
+    final static Token PLUS_EQUALS = Token.newWithoutOrigin(TokenType.PLUS_EQUALS, "'+='", "+=");

    static Token newLine(ConfigOrigin origin) {
        return new Line(origin);
@ -386,14 +450,22 @@ final class Tokens {
        return new Problem(origin, what, message, suggestQuotes, cause);
    }

-    static Token newComment(ConfigOrigin origin, String text) {
-        return new Comment(origin, text);
+    static Token newCommentDoubleSlash(ConfigOrigin origin, String text) {
+        return new Comment.DoubleSlashComment(origin, text);
+    }
+
+    static Token newCommentHash(ConfigOrigin origin, String text) {
+        return new Comment.HashComment(origin, text);
    }

    static Token newUnquotedText(ConfigOrigin origin, String s) {
        return new UnquotedText(origin, s);
    }

+    static Token newIgnoredWhitespace(ConfigOrigin origin, String s) {
+        return new IgnoredWhitespace(origin, s);
+    }
+
    static Token newSubstitution(ConfigOrigin origin, boolean optional, List<Token> expression) {
        return new Substitution(origin, optional, expression);
    }
@ -401,32 +473,35 @@ final class Tokens {
    static Token newValue(AbstractConfigValue value) {
        return new Value(value);
    }
-
-    static Token newString(ConfigOrigin origin, String value) {
-        return newValue(new ConfigString.Quoted(origin, value));
+    static Token newValue(AbstractConfigValue value, String origText) {
+        return new Value(value, origText);
    }

-    static Token newInt(ConfigOrigin origin, int value, String originalText) {
+    static Token newString(ConfigOrigin origin, String value, String origText) {
+        return newValue(new ConfigString.Quoted(origin, value), origText);
+    }
+
+    static Token newInt(ConfigOrigin origin, int value, String origText) {
        return newValue(ConfigNumber.newNumber(origin, value,
-                originalText));
+                origText), origText);
    }

    static Token newDouble(ConfigOrigin origin, double value,
-            String originalText) {
+            String origText) {
        return newValue(ConfigNumber.newNumber(origin, value,
-                originalText));
+                origText), origText);
    }

-    static Token newLong(ConfigOrigin origin, long value, String originalText) {
+    static Token newLong(ConfigOrigin origin, long value, String origText) {
        return newValue(ConfigNumber.newNumber(origin, value,
-                originalText));
+                origText), origText);
    }

    static Token newNull(ConfigOrigin origin) {
-        return newValue(new ConfigNull(origin));
+        return newValue(new ConfigNull(origin), "null");
    }

    static Token newBoolean(ConfigOrigin origin, boolean value) {
-        return newValue(new ConfigBoolean(origin, value));
+        return newValue(new ConfigBoolean(origin, value), "" + value);
    }
 }
--- a/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala
+++ b/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala
@ -611,12 +611,14 @@ abstract trait TestUtils {
    def tokenFalse = Tokens.newBoolean(fakeOrigin(), false)
    def tokenNull = Tokens.newNull(fakeOrigin())
    def tokenUnquoted(s: String) = Tokens.newUnquotedText(fakeOrigin(), s)
-    def tokenString(s: String) = Tokens.newString(fakeOrigin(), s)
+    def tokenString(s: String) = Tokens.newString(fakeOrigin(), s, s)
    def tokenDouble(d: Double) = Tokens.newDouble(fakeOrigin(), d, null)
    def tokenInt(i: Int) = Tokens.newInt(fakeOrigin(), i, null)
    def tokenLong(l: Long) = Tokens.newLong(fakeOrigin(), l, null)
    def tokenLine(line: Int) = Tokens.newLine(fakeOrigin.withLineNumber(line))
-    def tokenComment(text: String) = Tokens.newComment(fakeOrigin(), text)
+    def tokenCommentDoubleSlash(text: String) = Tokens.newCommentDoubleSlash(fakeOrigin(), text)
+    def tokenCommentHash(text: String) = Tokens.newCommentHash(fakeOrigin(), text)
+    def tokenWhitespace(text: String) = Tokens.newIgnoredWhitespace(fakeOrigin(), text)

    private def tokenMaybeOptionalSubstitution(optional: Boolean, expression: Token*) = {
        val l = new java.util.ArrayList[Token]
@ -657,6 +659,10 @@ abstract trait TestUtils {
        tokenize(s).asScala.toList
    }

+    def tokenizeAsString(s: String) = {
+        Tokenizer.render(tokenize(s))
+    }
+
    // this is importantly NOT using Path.newPath, which relies on
    // the parser; in the test suite we are often testing the parser,
    // so we don't want to use the parser to build the expected result.
--- a/config/src/test/scala/com/typesafe/config/impl/TokenizerTest.scala
+++ b/config/src/test/scala/com/typesafe/config/impl/TokenizerTest.scala
@ -14,18 +14,21 @@ class TokenizerTest extends TestUtils {
    private def tokenizerTest(expected: List[Token], s: String) {
        assertEquals(List(Tokens.START) ++ expected ++ List(Tokens.END),
            tokenizeAsList(s))
+        assertEquals(s, tokenizeAsString(s))
    }

    @Test
    def tokenizeEmptyString() {
-        assertEquals(List(Tokens.START, Tokens.END),
-            tokenizeAsList(""))
+        val source = ""
+        val expected = List()
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeNewlines() {
-        assertEquals(List(Tokens.START, tokenLine(1), tokenLine(2), Tokens.END),
-            tokenizeAsList("\n\n"))
+        val source = "\n\n"
+        val expected = List(tokenLine(1), tokenLine(2))
+        tokenizerTest(expected, source)
    }

    @Test
@ -33,75 +36,86 @@ class TokenizerTest extends TestUtils {
        // all token types with no spaces (not sure JSON spec wants this to work,
        // but spec is unclear to me when spaces are required, and banning them
        // is actually extra work).
-        val expected = List(Tokens.START, Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY,
+        val source = """,:=}{][+="foo"""" + "\"\"\"bar\"\"\"" + """true3.14false42null${a.b}${?x.y}${"c.d"}""" + "\n"
+        val expected = List(Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY,
            Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"),
            tokenString("bar"), tokenTrue, tokenDouble(3.14), tokenFalse,
            tokenLong(42), tokenNull, tokenSubstitution(tokenUnquoted("a.b")),
            tokenOptionalSubstitution(tokenUnquoted("x.y")),
-            tokenKeySubstitution("c.d"), tokenLine(1), Tokens.END)
-        assertEquals(expected, tokenizeAsList(""",:=}{][+="foo"""" + "\"\"\"bar\"\"\"" + """true3.14false42null${a.b}${?x.y}${"c.d"}""" + "\n"))
+            tokenKeySubstitution("c.d"), tokenLine(1))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeAllTypesWithSingleSpaces() {
-        val expected = List(Tokens.START, Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY,
-            Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"),
-            tokenUnquoted(" "), tokenString("bar"), tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "), tokenTrue, tokenUnquoted(" "),
-            tokenDouble(3.14), tokenUnquoted(" "), tokenFalse, tokenUnquoted(" "), tokenNull,
+        val source = """ , : = } { ] [ += "foo" """ + "\"\"\"bar\"\"\"" + """ 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n "
+        val expected = List(tokenWhitespace(" "), Tokens.COMMA, tokenWhitespace(" "), Tokens.COLON, tokenWhitespace(" "),
+            Tokens.EQUALS, tokenWhitespace(" "), Tokens.CLOSE_CURLY, tokenWhitespace(" "), Tokens.OPEN_CURLY, tokenWhitespace(" "),
+            Tokens.CLOSE_SQUARE, tokenWhitespace(" "), Tokens.OPEN_SQUARE, tokenWhitespace(" "), Tokens.PLUS_EQUALS, tokenWhitespace(" "),
+            tokenString("foo"), tokenUnquoted(" "), tokenString("bar"), tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "),
+            tokenTrue, tokenUnquoted(" "), tokenDouble(3.14), tokenUnquoted(" "), tokenFalse, tokenUnquoted(" "), tokenNull,
            tokenUnquoted(" "), tokenSubstitution(tokenUnquoted("a.b")), tokenUnquoted(" "),
            tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenUnquoted(" "),
-            tokenKeySubstitution("c.d"),
-            tokenLine(1), Tokens.END)
-        assertEquals(expected, tokenizeAsList(""" , : = } { ] [ += "foo" """ + "\"\"\"bar\"\"\"" + """ 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n "))
+            tokenKeySubstitution("c.d"), tokenWhitespace(" "),
+            tokenLine(1), tokenWhitespace(" "))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeAllTypesWithMultipleSpaces() {
-        val expected = List(Tokens.START, Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY,
-            Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"),
+        val source = """   ,   :   =   }   {   ]   [   +=   "foo"   """ + "\"\"\"bar\"\"\"" + """   42   true   3.14   false   null   ${a.b}   ${?x.y}   ${"c.d"}  """ + "\n   "
+        val expected = List(tokenWhitespace("   "), Tokens.COMMA, tokenWhitespace("   "), Tokens.COLON, tokenWhitespace("   "),
+            Tokens.EQUALS, tokenWhitespace("   "), Tokens.CLOSE_CURLY, tokenWhitespace("   "), Tokens.OPEN_CURLY, tokenWhitespace("   "), Tokens.CLOSE_SQUARE,
+            tokenWhitespace("   "), Tokens.OPEN_SQUARE, tokenWhitespace("   "), Tokens.PLUS_EQUALS, tokenWhitespace("   "), tokenString("foo"),
            tokenUnquoted("   "), tokenString("bar"), tokenUnquoted("   "), tokenLong(42), tokenUnquoted("   "), tokenTrue, tokenUnquoted("   "),
            tokenDouble(3.14), tokenUnquoted("   "), tokenFalse, tokenUnquoted("   "), tokenNull,
            tokenUnquoted("   "), tokenSubstitution(tokenUnquoted("a.b")), tokenUnquoted("   "),
            tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenUnquoted("   "),
-            tokenKeySubstitution("c.d"),
-            tokenLine(1), Tokens.END)
-        assertEquals(expected, tokenizeAsList("""   ,   :   =   }   {   ]   [   +=   "foo"   """ + "\"\"\"bar\"\"\"" + """   42   true   3.14   false   null   ${a.b}   ${?x.y}   ${"c.d"}  """ + "\n   "))
+            tokenKeySubstitution("c.d"), tokenWhitespace("  "),
+            tokenLine(1), tokenWhitespace("   "))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeTrueAndUnquotedText() {
-        val expected = List(Tokens.START, tokenTrue, tokenUnquoted("foo"), Tokens.END)
-        assertEquals(expected, tokenizeAsList("""truefoo"""))
+        val source = """truefoo"""
+        val expected = List(tokenTrue, tokenUnquoted("foo"))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeFalseAndUnquotedText() {
-        val expected = List(Tokens.START, tokenFalse, tokenUnquoted("foo"), Tokens.END)
-        assertEquals(expected, tokenizeAsList("""falsefoo"""))
+        val source = """falsefoo"""
+        val expected = List(tokenFalse, tokenUnquoted("foo"))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeNullAndUnquotedText() {
-        val expected = List(Tokens.START, tokenNull, tokenUnquoted("foo"), Tokens.END)
-        assertEquals(expected, tokenizeAsList("""nullfoo"""))
+        val source = """nullfoo"""
+        val expected = List(tokenNull, tokenUnquoted("foo"))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeUnquotedTextContainingTrue() {
-        val expected = List(Tokens.START, tokenUnquoted("footrue"), Tokens.END)
-        assertEquals(expected, tokenizeAsList("""footrue"""))
+        val source = """footrue"""
+        val expected = List(tokenUnquoted("footrue"))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeUnquotedTextContainingSpaceTrue() {
-        val expected = List(Tokens.START, tokenUnquoted("foo"), tokenUnquoted(" "), tokenTrue, Tokens.END)
-        assertEquals(expected, tokenizeAsList("""foo true"""))
+        val source = """foo true"""
+        val expected = List(tokenUnquoted("foo"), tokenUnquoted(" "), tokenTrue)
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeTrueAndSpaceAndUnquotedText() {
-        val expected = List(Tokens.START, tokenTrue, tokenUnquoted(" "), tokenUnquoted("foo"), Tokens.END)
-        assertEquals(expected, tokenizeAsList("""true foo"""))
+        val source = """true foo"""
+        val expected = List(tokenTrue, tokenUnquoted(" "), tokenUnquoted("foo"))
+        tokenizerTest(expected, source)
    }

    @Test
@ -109,28 +123,33 @@ class TokenizerTest extends TestUtils {
        tokenizerTest(List(tokenUnquoted("a/b/c/")), "a/b/c/")
        tokenizerTest(List(tokenUnquoted("/")), "/")
        tokenizerTest(List(tokenUnquoted("/"), tokenUnquoted(" "), tokenUnquoted("/")), "/ /")
-        tokenizerTest(List(tokenComment("")), "//")
+        tokenizerTest(List(tokenCommentDoubleSlash("")), "//")
    }

    @Test
-    def tokenizeUnquotedTextTrimsSpaces() {
-        val expected = List(Tokens.START, tokenUnquoted("foo"), tokenLine(1), Tokens.END)
-        assertEquals(expected, tokenizeAsList("    foo     \n"))
+    def tokenizeUnquotedTextKeepsSpaces() {
+        val source = "    foo     \n"
+        val expected = List(tokenWhitespace("    "), tokenUnquoted("foo"), tokenWhitespace("     "),
+            tokenLine(1))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeUnquotedTextKeepsInternalSpaces() {
-        val expected = List(Tokens.START, tokenUnquoted("foo"), tokenUnquoted("  "), tokenUnquoted("bar"),
-            tokenUnquoted(" "), tokenUnquoted("baz"), tokenLine(1), Tokens.END)
-        assertEquals(expected, tokenizeAsList("    foo  bar baz   \n"))
+        val source = "    foo  bar baz   \n"
+        val expected = List(tokenWhitespace("    "), tokenUnquoted("foo"), tokenUnquoted("  "),
+            tokenUnquoted("bar"), tokenUnquoted(" "), tokenUnquoted("baz"), tokenWhitespace("   "),
+            tokenLine(1))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeMixedUnquotedQuoted() {
-        val expected = List(Tokens.START, tokenUnquoted("foo"),
-            tokenString("bar"), tokenUnquoted("baz"),
-            tokenLine(1), Tokens.END)
-        assertEquals(expected, tokenizeAsList("    foo\"bar\"baz   \n"))
+        val source = "    foo\"bar\"baz   \n"
+        val expected = List(tokenWhitespace("    "), tokenUnquoted("foo"),
+            tokenString("bar"), tokenUnquoted("baz"), tokenWhitespace("   "),
+            tokenLine(1))
+        tokenizerTest(expected, source)
    }

    @Test
@ -147,13 +166,14 @@ class TokenizerTest extends TestUtils {
        val tests = List[UnescapeTest]((""" "" """, ""),
            (" \"\\u0000\" ", Character.toString(0)), // nul byte
            (""" "\"\\\/\b\f\n\r\t" """, "\"\\/\b\f\n\r\t"),
-            ("\"\\u0046\"", "F"),
-            ("\"\\u0046\\u0046\"", "FF"))
+            (" \"\\u0046\" ", "F"),
+            (" \"\\u0046\\u0046\" ", "FF"))

        for (t <- tests) {
            describeFailure(t.toString) {
-                assertEquals(List(Tokens.START, Tokens.newValue(t.result), Tokens.END),
-                    tokenizeAsList(t.escaped))
+                val expected = List(tokenWhitespace(" "), Tokens.newValue(t.result, t.toString),
+                  tokenWhitespace(" "))
+                tokenizerTest(expected, t.escaped)
            }
        }
    }
@ -182,32 +202,37 @@ class TokenizerTest extends TestUtils {

    @Test
    def tokenizerEmptyTripleQuoted(): Unit = {
-        assertEquals(List(Tokens.START, tokenString(""), Tokens.END),
-            tokenizeAsList("\"\"\"\"\"\""))
+        val source = "\"\"\"\"\"\""
+        val expected = List(tokenString(""))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizerTrivialTripleQuoted(): Unit = {
-        assertEquals(List(Tokens.START, tokenString("bar"), Tokens.END),
-            tokenizeAsList("\"\"\"bar\"\"\""))
+        val source = "\"\"\"bar\"\"\""
+        val expected = List(tokenString("bar"))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizerNoEscapesInTripleQuoted(): Unit = {
-        assertEquals(List(Tokens.START, tokenString("\\n"), Tokens.END),
-            tokenizeAsList("\"\"\"\\n\"\"\""))
+        val source = "\"\"\"\\n\"\"\""
+        val expected = List(tokenString("\\n"))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizerTrailingQuotesInTripleQuoted(): Unit = {
-        assertEquals(List(Tokens.START, tokenString("\"\"\""), Tokens.END),
-            tokenizeAsList("\"\"\"\"\"\"\"\"\""))
+        val source = "\"\"\"\"\"\"\"\"\""
+        val expected = List(tokenString("\"\"\""))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizerNewlineInTripleQuoted(): Unit = {
-        assertEquals(List(Tokens.START, tokenString("foo\nbar"), Tokens.END),
-            tokenizeAsList("\"\"\"foo\nbar\"\"\""))
+        val source = "\"\"\"foo\nbar\"\"\""
+        val expected = List(tokenString("foo\nbar"))
+        tokenizerTest(expected, source)
    }

    @Test
@ -229,8 +254,8 @@ class TokenizerTest extends TestUtils {

        for (t <- tests) {
            describeFailure(t.toString()) {
-                assertEquals(List(Tokens.START, t.result, Tokens.END),
-                    tokenizeAsList(t.s))
+                val expected = List(t.result)
+                tokenizerTest(expected, t.s)
            }
        }
    }
@ -239,15 +264,30 @@ class TokenizerTest extends TestUtils {
    def commentsHandledInVariousContexts() {
        tokenizerTest(List(tokenString("//bar")), "\"//bar\"")
        tokenizerTest(List(tokenString("#bar")), "\"#bar\"")
-        tokenizerTest(List(tokenUnquoted("bar"), tokenComment("comment")), "bar//comment")
-        tokenizerTest(List(tokenUnquoted("bar"), tokenComment("comment")), "bar#comment")
-        tokenizerTest(List(tokenInt(10), tokenComment("comment")), "10//comment")
-        tokenizerTest(List(tokenInt(10), tokenComment("comment")), "10#comment")
-        tokenizerTest(List(tokenDouble(3.14), tokenComment("comment")), "3.14//comment")
-        tokenizerTest(List(tokenDouble(3.14), tokenComment("comment")), "3.14#comment")
+        tokenizerTest(List(tokenUnquoted("bar"), tokenCommentDoubleSlash("comment")), "bar//comment")
+        tokenizerTest(List(tokenUnquoted("bar"), tokenCommentHash("comment")), "bar#comment")
+        tokenizerTest(List(tokenInt(10), tokenCommentDoubleSlash("comment")), "10//comment")
+        tokenizerTest(List(tokenInt(10), tokenCommentHash("comment")), "10#comment")
+        tokenizerTest(List(tokenDouble(3.14), tokenCommentDoubleSlash("comment")), "3.14//comment")
+        tokenizerTest(List(tokenDouble(3.14), tokenCommentHash("comment")), "3.14#comment")
        // be sure we keep the newline
-        tokenizerTest(List(tokenInt(10), tokenComment("comment"), tokenLine(1), tokenInt(12)), "10//comment\n12")
-        tokenizerTest(List(tokenInt(10), tokenComment("comment"), tokenLine(1), tokenInt(12)), "10#comment\n12")
+        tokenizerTest(List(tokenInt(10), tokenCommentDoubleSlash("comment"), tokenLine(1), tokenInt(12)), "10//comment\n12")
+        tokenizerTest(List(tokenInt(10), tokenCommentHash("comment"), tokenLine(1), tokenInt(12)), "10#comment\n12")
+        // be sure we handle multi-line comments
+        tokenizerTest(List(tokenCommentDoubleSlash("comment"), tokenLine(1), tokenCommentDoubleSlash("comment2")),
+                      "//comment\n//comment2")
+        tokenizerTest(List(tokenCommentHash("comment"), tokenLine(1), tokenCommentHash("comment2")),
+                      "#comment\n#comment2")
+        tokenizerTest(List(tokenWhitespace("        "), tokenCommentDoubleSlash("comment\r"),
+                           tokenLine(1), tokenWhitespace("        "), tokenCommentDoubleSlash("comment2        "),
+                           tokenLine(2), tokenCommentDoubleSlash("comment3        "),
+                           tokenLine(3), tokenLine(4), tokenCommentDoubleSlash("comment4")),
+                      "        //comment\r\n        //comment2        \n//comment3        \n\n//comment4")
+        tokenizerTest(List(tokenWhitespace("        "), tokenCommentDoubleSlash("comment\r"),
+                           tokenLine(1), tokenWhitespace("        "), tokenCommentDoubleSlash("comment2        "),
+                           tokenLine(2), tokenCommentDoubleSlash("comment3        "),
+                           tokenLine(3), tokenLine(4), tokenCommentDoubleSlash("comment4")),
+                      "        //comment\r\n        //comment2        \n//comment3        \n\n//comment4")
    }

    @Test