Merge pull request #271 from fpringvaldsen/feature/lossless-tokens

Add lossless tokens
2025-01-29 05:30:08 +08:00 · 2015-03-04 13:29:20 -05:00 · 2015-03-04 13:29:20 -05:00 · f3e11bc583
commit f3e11bc583
parent fda341b44f cb86fb136e
7 changed files with 307 additions and 128 deletions
--- a/config/src/main/java/com/typesafe/config/impl/Parser.java
+++ b/config/src/main/java/com/typesafe/config/impl/Parser.java
@ -203,7 +203,7 @@ final class Parser {
                }
                previous = next;
-                next = tokens.next();
+                next = nextTokenIgnoringWhitespace();
            }
            // put our concluding token in the queue with all the comments
@ -219,7 +219,7 @@ final class Parser {
        private TokenWithComments popTokenWithoutTrailingComment() {
            if (buffer.isEmpty()) {
-                Token t = tokens.next();
+                Token t = nextTokenIgnoringWhitespace();
                if (Tokens.isComment(t)) {
                    consolidateCommentBlock(t);
                    return buffer.pop();
@ -243,7 +243,7 @@ final class Parser {
            if (!attractsTrailingComments(withPrecedingComments.token)) {
                return withPrecedingComments;
            } else if (buffer.isEmpty()) {
-                Token after = tokens.next();
+                Token after = nextTokenIgnoringWhitespace();
                if (Tokens.isComment(after)) {
                    return withPrecedingComments.add(after);
                } else {
@ -319,6 +319,16 @@ final class Parser {
            return t;
        }
        // Grabs the next Token off of the TokenIterator, ignoring
        // IgnoredWhitespace tokens
        private Token nextTokenIgnoringWhitespace() {
            Token t;
            do {
                t = tokens.next();
            } while (Tokens.isIgnoredWhitespace(t));
            return t;
        }
        private AbstractConfigValue addAnyCommentsAfterAnyComma(AbstractConfigValue v) {
            TokenWithComments t = nextToken(); // do NOT skip newlines, we only
                                               // want same-line comments
@ -1063,6 +1073,11 @@ final class Parser {
        while (expression.hasNext()) {
            Token t = expression.next();
            // Ignore all IgnoredWhitespace tokens
            if (Tokens.isIgnoredWhitespace(t))
                continue;
            if (Tokens.isValueWithType(t, ConfigValueType.STRING)) {
                AbstractConfigValue v = Tokens.getValue(t);
                // this is a quoted string; so any periods
--- a/config/src/main/java/com/typesafe/config/impl/Token.java
+++ b/config/src/main/java/com/typesafe/config/impl/Token.java
@ -10,26 +10,34 @@ class Token {
    final private TokenType tokenType;
    final private String debugString;
    final private ConfigOrigin origin;
    final private String tokenText;
    Token(TokenType tokenType, ConfigOrigin origin) {
        this(tokenType, origin, null);
    }
-    Token(TokenType tokenType, ConfigOrigin origin, String debugString) {
+    Token(TokenType tokenType, ConfigOrigin origin, String tokenText) {
        this(tokenType, origin, tokenText, null);
    }
    Token(TokenType tokenType, ConfigOrigin origin, String tokenText, String debugString) {
        this.tokenType = tokenType;
        this.origin = origin;
        this.debugString = debugString;
        this.tokenText = tokenText;
    }
    // this is used for singleton tokens like COMMA or OPEN_CURLY
-    static Token newWithoutOrigin(TokenType tokenType, String debugString) {
+    static Token newWithoutOrigin(TokenType tokenType, String debugString, String tokenText) {
-        return new Token(tokenType, null, debugString);
+        return new Token(tokenType, null, tokenText, debugString);
    }
    final TokenType tokenType() {
        return tokenType;
    }
    public String tokenText() { return tokenText; }
    // this is final because we don't always use the origin() accessor,
    // and we don't because it throws if origin is null
    final ConfigOrigin origin() {
--- a/config/src/main/java/com/typesafe/config/impl/TokenType.java
+++ b/config/src/main/java/com/typesafe/config/impl/TokenType.java
@ -16,6 +16,7 @@ enum TokenType {
    VALUE,
    NEWLINE,
    UNQUOTED_TEXT,
    IGNORED_WHITESPACE,
    SUBSTITUTION,
    PROBLEM,
    COMMENT,
--- a/config/src/main/java/com/typesafe/config/impl/Tokenizer.java
+++ b/config/src/main/java/com/typesafe/config/impl/Tokenizer.java
@ -52,6 +52,14 @@ final class Tokenizer {
        return new TokenIterator(origin, input, flavor != ConfigSyntax.JSON);
    }
    static String render(Iterator<Token> tokens) {
        StringBuilder renderedText = new StringBuilder();
        while (tokens.hasNext()) {
            renderedText.append(tokens.next().tokenText());
        }
        return renderedText.toString();
    }
    private static class TokenIterator implements Iterator<Token> {
        private static class WhitespaceSaver {
@ -66,25 +74,23 @@ final class Tokenizer {
            }
            void add(int c) {
-                if (lastTokenWasSimpleValue)
+                whitespace.appendCodePoint(c);
                    whitespace.appendCodePoint(c);
            }
            Token check(Token t, ConfigOrigin baseOrigin, int lineNumber) {
                if (isSimpleValue(t)) {
                    return nextIsASimpleValue(baseOrigin, lineNumber);
                } else {
-                    nextIsNotASimpleValue();
+                    return nextIsNotASimpleValue(baseOrigin, lineNumber);
                    return null;
                }
            }
            // called if the next token is not a simple value;
            // discards any whitespace we were saving between
            // simple values.
-            private void nextIsNotASimpleValue() {
+            private Token nextIsNotASimpleValue(ConfigOrigin baseOrigin, int lineNumber) {
                lastTokenWasSimpleValue = false;
-                whitespace.setLength(0);
+                return createWhitespaceTokenFromSaver(baseOrigin, lineNumber);
            }
            // called if the next token IS a simple value,
@ -92,24 +98,29 @@ final class Tokenizer {
            // token also was.
            private Token nextIsASimpleValue(ConfigOrigin baseOrigin,
                    int lineNumber) {
-                if (lastTokenWasSimpleValue) {
+                Token t = createWhitespaceTokenFromSaver(baseOrigin, lineNumber);
-                    // need to save whitespace between the two so
+                if (!lastTokenWasSimpleValue) {
                    // the parser has the option to concatenate it.
                    if (whitespace.length() > 0) {
                        Token t = Tokens.newUnquotedText(
                                lineOrigin(baseOrigin, lineNumber),
                                whitespace.toString());
                        whitespace.setLength(0); // reset
                        return t;
                    } else {
                        // lastTokenWasSimpleValue = true still
                        return null;
                    }
                } else {
                    lastTokenWasSimpleValue = true;
                    whitespace.setLength(0);
                    return null;
                }
                return t;
            }
            private Token createWhitespaceTokenFromSaver(ConfigOrigin baseOrigin,
                                                         int lineNumber) {
                if (whitespace.length() > 0) {
                    Token t;
                    if (lastTokenWasSimpleValue) {
                        t = Tokens.newUnquotedText(
                            lineOrigin(baseOrigin, lineNumber),
                            whitespace.toString());
                    } else {
                        t = Tokens.newIgnoredWhitespace(lineOrigin(baseOrigin, lineNumber),
                                                        whitespace.toString());
                    }
                    whitespace.setLength(0); // reset
                    return t;
                }
                return null;
            }
        }
@ -260,10 +271,12 @@ final class Tokenizer {
        // ONE char has always been consumed, either the # or the first /, but
        // not both slashes
        private Token pullComment(int firstChar) {
            boolean doubleSlash = false;
            if (firstChar == '/') {
                int discard = nextCharRaw();
                if (discard != '/')
                    throw new ConfigException.BugOrBroken("called pullComment but // not seen");
                doubleSlash = true;
            }
            StringBuilder sb = new StringBuilder();
@ -271,7 +284,10 @@ final class Tokenizer {
                int c = nextCharRaw();
                if (c == -1 || c == '\n') {
                    putBack(c);
-                    return Tokens.newComment(lineOrigin, sb.toString());
+                    if (doubleSlash)
                        return Tokens.newCommentDoubleSlash(lineOrigin, sb.toString());
                    else
                        return Tokens.newCommentHash(lineOrigin, sb.toString());
                } else {
                    sb.appendCodePoint(c);
                }
@ -367,11 +383,16 @@ final class Tokenizer {
            }
        }
-        private void pullEscapeSequence(StringBuilder sb) throws ProblemException {
+        private void pullEscapeSequence(StringBuilder sb, StringBuilder sbOrig) throws ProblemException {
            int escaped = nextCharRaw();
            if (escaped == -1)
                throw problem("End of input but backslash in string had nothing after it");
            // This is needed so we return the unescaped escape characters back out when rendering
            // the token
            sbOrig.appendCodePoint('\\');
            sbOrig.appendCodePoint(escaped);
            switch (escaped) {
            case '"':
                sb.append('"');
@ -407,6 +428,7 @@ final class Tokenizer {
                    a[i] = (char) c;
                }
                String digits = new String(a);
                sbOrig.append(a);
                try {
                    sb.appendCodePoint(Integer.parseInt(digits, 16));
                } catch (NumberFormatException e) {
@ -424,7 +446,7 @@ final class Tokenizer {
            }
        }
-        private void appendTripleQuotedString(StringBuilder sb) throws ProblemException {
+        private void appendTripleQuotedString(StringBuilder sb, StringBuilder sbOrig) throws ProblemException {
            // we are after the opening triple quote and need to consume the
            // close triple
            int consecutiveQuotes = 0;
@ -451,26 +473,37 @@ final class Tokenizer {
                }
                sb.appendCodePoint(c);
                sbOrig.appendCodePoint(c);
            }
        }
        private Token pullQuotedString() throws ProblemException {
            // the open quote has already been consumed
            StringBuilder sb = new StringBuilder();
            // We need a second string builder to keep track of escape characters.
            // We want to return them exactly as they appeared in the original text,
            // which means we will need a new StringBuilder to escape escape characters
            // so we can also keep the actual value of the string. This is gross.
            StringBuilder sbOrig = new StringBuilder();
            sbOrig.appendCodePoint('"');
            while (true) {
                int c = nextCharRaw();
                if (c == -1)
                    throw problem("End of input but string quote was still open");
                if (c == '\\') {
-                    pullEscapeSequence(sb);
+                    pullEscapeSequence(sb, sbOrig);
                } else if (c == '"') {
                    sbOrig.appendCodePoint(c);
                    break;
                } else if (Character.isISOControl(c)) {
                    throw problem(asString(c), "JSON does not allow unescaped " + asString(c)
                            + " in quoted strings, use a backslash escape");
                } else {
                    sb.appendCodePoint(c);
                    sbOrig.appendCodePoint(c);
                }
            }
@ -478,13 +511,14 @@ final class Tokenizer {
            if (sb.length() == 0) {
                int third = nextCharRaw();
                if (third == '"') {
-                    appendTripleQuotedString(sb);
+                    sbOrig.appendCodePoint(third);
                    appendTripleQuotedString(sb, sbOrig);
                } else {
                    putBack(third);
                }
            }
-            return Tokens.newString(lineOrigin, sb.toString());
+            }
            return Tokens.newString(lineOrigin, sb.toString(), sbOrig.toString());
        }
        private Token pullPlusEquals() throws ProblemException {
--- a/config/src/main/java/com/typesafe/config/impl/Tokens.java
+++ b/config/src/main/java/com/typesafe/config/impl/Tokens.java
@ -16,7 +16,11 @@ final class Tokens {
        final private AbstractConfigValue value;
        Value(AbstractConfigValue value) {
-            super(TokenType.VALUE, value.origin());
+            this(value, null);
        }
        Value(AbstractConfigValue value, String origText) {
            super(TokenType.VALUE, value.origin(), origText);
            this.value = value;
        }
@ -72,6 +76,11 @@ final class Tokens {
        public int hashCode() {
            return 41 * (41 + super.hashCode()) + lineNumber();
        }
        @Override
        public String tokenText() {
            return "\n";
        }
    }
    // This is not a Value, because it requires special processing
@ -107,6 +116,30 @@ final class Tokens {
        public int hashCode() {
            return 41 * (41 + super.hashCode()) + value.hashCode();
        }
        @Override
        public String tokenText() {
            return value;
        }
    }
    static private class IgnoredWhitespace extends Token {
        final private String value;
        IgnoredWhitespace(ConfigOrigin origin, String s) {
            super(TokenType.IGNORED_WHITESPACE, origin);
            this.value = s;
        }
        String value() { return value; }
        @Override
        public String toString() { return "'" + value + "' (WHITESPACE)"; }
        @Override
        public String tokenText() {
            return value;
        }
    }
    static private class Problem extends Token {
@ -177,7 +210,7 @@ final class Tokens {
        }
    }
-    static private class Comment extends Token {
+    static private abstract class Comment extends Token {
        final private String text;
        Comment(ConfigOrigin origin, String text) {
@ -185,6 +218,28 @@ final class Tokens {
            this.text = text;
        }
        final static class DoubleSlashComment extends Comment {
            DoubleSlashComment(ConfigOrigin origin, String text) {
                super(origin, text);
            }
            @Override
            public String tokenText() {
                return "//" + super.text;
            }
        }
        final static class HashComment extends Comment {
            HashComment(ConfigOrigin origin, String text) {
                super(origin, text);
            }
            @Override
            public String tokenText() {
                return "#" + super.text;
            }
        }
        String text() {
            return text;
        }
@ -235,6 +290,11 @@ final class Tokens {
            return value;
        }
        @Override
        public String tokenText() {
            return "${" + (this.optional? "?" : "") + Tokenizer.render(this.value.iterator()) + "}";
        }
        @Override
        public String toString() {
            StringBuilder sb = new StringBuilder();
@ -344,6 +404,10 @@ final class Tokens {
        }
    }
    static boolean isIgnoredWhitespace(Token token) {
        return token instanceof IgnoredWhitespace;
    }
    static boolean isSubstitution(Token token) {
        return token instanceof Substitution;
    }
@ -366,16 +430,16 @@ final class Tokens {
        }
    }
-    final static Token START = Token.newWithoutOrigin(TokenType.START, "start of file");
+    final static Token START = Token.newWithoutOrigin(TokenType.START, "start of file", "");
-    final static Token END = Token.newWithoutOrigin(TokenType.END, "end of file");
+    final static Token END = Token.newWithoutOrigin(TokenType.END, "end of file", "");
-    final static Token COMMA = Token.newWithoutOrigin(TokenType.COMMA, "','");
+    final static Token COMMA = Token.newWithoutOrigin(TokenType.COMMA, "','", ",");
-    final static Token EQUALS = Token.newWithoutOrigin(TokenType.EQUALS, "'='");
+    final static Token EQUALS = Token.newWithoutOrigin(TokenType.EQUALS, "'='", "=");
-    final static Token COLON = Token.newWithoutOrigin(TokenType.COLON, "':'");
+    final static Token COLON = Token.newWithoutOrigin(TokenType.COLON, "':'", ":");
-    final static Token OPEN_CURLY = Token.newWithoutOrigin(TokenType.OPEN_CURLY, "'{'");
+    final static Token OPEN_CURLY = Token.newWithoutOrigin(TokenType.OPEN_CURLY, "'{'", "{");
-    final static Token CLOSE_CURLY = Token.newWithoutOrigin(TokenType.CLOSE_CURLY, "'}'");
+    final static Token CLOSE_CURLY = Token.newWithoutOrigin(TokenType.CLOSE_CURLY, "'}'", "}");
-    final static Token OPEN_SQUARE = Token.newWithoutOrigin(TokenType.OPEN_SQUARE, "'['");
+    final static Token OPEN_SQUARE = Token.newWithoutOrigin(TokenType.OPEN_SQUARE, "'['", "[");
-    final static Token CLOSE_SQUARE = Token.newWithoutOrigin(TokenType.CLOSE_SQUARE, "']'");
+    final static Token CLOSE_SQUARE = Token.newWithoutOrigin(TokenType.CLOSE_SQUARE, "']'", "]");
-    final static Token PLUS_EQUALS = Token.newWithoutOrigin(TokenType.PLUS_EQUALS, "'+='");
+    final static Token PLUS_EQUALS = Token.newWithoutOrigin(TokenType.PLUS_EQUALS, "'+='", "+=");
    static Token newLine(ConfigOrigin origin) {
        return new Line(origin);
@ -386,14 +450,22 @@ final class Tokens {
        return new Problem(origin, what, message, suggestQuotes, cause);
    }
-    static Token newComment(ConfigOrigin origin, String text) {
+    static Token newCommentDoubleSlash(ConfigOrigin origin, String text) {
-        return new Comment(origin, text);
+        return new Comment.DoubleSlashComment(origin, text);
    }
    static Token newCommentHash(ConfigOrigin origin, String text) {
        return new Comment.HashComment(origin, text);
    }
    static Token newUnquotedText(ConfigOrigin origin, String s) {
        return new UnquotedText(origin, s);
    }
    static Token newIgnoredWhitespace(ConfigOrigin origin, String s) {
        return new IgnoredWhitespace(origin, s);
    }
    static Token newSubstitution(ConfigOrigin origin, boolean optional, List<Token> expression) {
        return new Substitution(origin, optional, expression);
    }
@ -401,32 +473,35 @@ final class Tokens {
    static Token newValue(AbstractConfigValue value) {
        return new Value(value);
    }
-
+    static Token newValue(AbstractConfigValue value, String origText) {
-    static Token newString(ConfigOrigin origin, String value) {
+        return new Value(value, origText);
        return newValue(new ConfigString.Quoted(origin, value));
    }
-    static Token newInt(ConfigOrigin origin, int value, String originalText) {
+    static Token newString(ConfigOrigin origin, String value, String origText) {
        return newValue(new ConfigString.Quoted(origin, value), origText);
    }
    static Token newInt(ConfigOrigin origin, int value, String origText) {
        return newValue(ConfigNumber.newNumber(origin, value,
-                originalText));
+                origText), origText);
    }
    static Token newDouble(ConfigOrigin origin, double value,
-            String originalText) {
+            String origText) {
        return newValue(ConfigNumber.newNumber(origin, value,
-                originalText));
+                origText), origText);
    }
-    static Token newLong(ConfigOrigin origin, long value, String originalText) {
+    static Token newLong(ConfigOrigin origin, long value, String origText) {
        return newValue(ConfigNumber.newNumber(origin, value,
-                originalText));
+                origText), origText);
    }
    static Token newNull(ConfigOrigin origin) {
-        return newValue(new ConfigNull(origin));
+        return newValue(new ConfigNull(origin), "null");
    }
    static Token newBoolean(ConfigOrigin origin, boolean value) {
-        return newValue(new ConfigBoolean(origin, value));
+        return newValue(new ConfigBoolean(origin, value), "" + value);
    }
 }
--- a/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala
+++ b/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala
@ -611,12 +611,14 @@ abstract trait TestUtils {
    def tokenFalse = Tokens.newBoolean(fakeOrigin(), false)
    def tokenNull = Tokens.newNull(fakeOrigin())
    def tokenUnquoted(s: String) = Tokens.newUnquotedText(fakeOrigin(), s)
-    def tokenString(s: String) = Tokens.newString(fakeOrigin(), s)
+    def tokenString(s: String) = Tokens.newString(fakeOrigin(), s, s)
    def tokenDouble(d: Double) = Tokens.newDouble(fakeOrigin(), d, null)
    def tokenInt(i: Int) = Tokens.newInt(fakeOrigin(), i, null)
    def tokenLong(l: Long) = Tokens.newLong(fakeOrigin(), l, null)
    def tokenLine(line: Int) = Tokens.newLine(fakeOrigin.withLineNumber(line))
-    def tokenComment(text: String) = Tokens.newComment(fakeOrigin(), text)
+    def tokenCommentDoubleSlash(text: String) = Tokens.newCommentDoubleSlash(fakeOrigin(), text)
    def tokenCommentHash(text: String) = Tokens.newCommentHash(fakeOrigin(), text)
    def tokenWhitespace(text: String) = Tokens.newIgnoredWhitespace(fakeOrigin(), text)
    private def tokenMaybeOptionalSubstitution(optional: Boolean, expression: Token*) = {
        val l = new java.util.ArrayList[Token]
@ -657,6 +659,10 @@ abstract trait TestUtils {
        tokenize(s).asScala.toList
    }
    def tokenizeAsString(s: String) = {
        Tokenizer.render(tokenize(s))
    }
    // this is importantly NOT using Path.newPath, which relies on
    // the parser; in the test suite we are often testing the parser,
    // so we don't want to use the parser to build the expected result.
--- a/config/src/test/scala/com/typesafe/config/impl/TokenizerTest.scala
+++ b/config/src/test/scala/com/typesafe/config/impl/TokenizerTest.scala
@ -14,18 +14,21 @@ class TokenizerTest extends TestUtils {
    private def tokenizerTest(expected: List[Token], s: String) {
        assertEquals(List(Tokens.START) ++ expected ++ List(Tokens.END),
            tokenizeAsList(s))
        assertEquals(s, tokenizeAsString(s))
    }
    @Test
    def tokenizeEmptyString() {
-        assertEquals(List(Tokens.START, Tokens.END),
+        val source = ""
-            tokenizeAsList(""))
+        val expected = List()
        tokenizerTest(expected, source)
    }
    @Test
    def tokenizeNewlines() {
-        assertEquals(List(Tokens.START, tokenLine(1), tokenLine(2), Tokens.END),
+        val source = "\n\n"
-            tokenizeAsList("\n\n"))
+        val expected = List(tokenLine(1), tokenLine(2))
        tokenizerTest(expected, source)
    }
    @Test
@ -33,75 +36,86 @@ class TokenizerTest extends TestUtils {
        // all token types with no spaces (not sure JSON spec wants this to work,
        // but spec is unclear to me when spaces are required, and banning them
        // is actually extra work).
-        val expected = List(Tokens.START, Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY,
+        val source = """,:=}{][+="foo"""" + "\"\"\"bar\"\"\"" + """true3.14false42null${a.b}${?x.y}${"c.d"}""" + "\n"
        val expected = List(Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY,
            Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"),
            tokenString("bar"), tokenTrue, tokenDouble(3.14), tokenFalse,
            tokenLong(42), tokenNull, tokenSubstitution(tokenUnquoted("a.b")),
            tokenOptionalSubstitution(tokenUnquoted("x.y")),
-            tokenKeySubstitution("c.d"), tokenLine(1), Tokens.END)
+            tokenKeySubstitution("c.d"), tokenLine(1))
-        assertEquals(expected, tokenizeAsList(""",:=}{][+="foo"""" + "\"\"\"bar\"\"\"" + """true3.14false42null${a.b}${?x.y}${"c.d"}""" + "\n"))
+        tokenizerTest(expected, source)
    }
    @Test
    def tokenizeAllTypesWithSingleSpaces() {
-        val expected = List(Tokens.START, Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY,
+        val source = """ , : = } { ] [ += "foo" """ + "\"\"\"bar\"\"\"" + """ 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n "
-            Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"),
+        val expected = List(tokenWhitespace(" "), Tokens.COMMA, tokenWhitespace(" "), Tokens.COLON, tokenWhitespace(" "),
-            tokenUnquoted(" "), tokenString("bar"), tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "), tokenTrue, tokenUnquoted(" "),
+            Tokens.EQUALS, tokenWhitespace(" "), Tokens.CLOSE_CURLY, tokenWhitespace(" "), Tokens.OPEN_CURLY, tokenWhitespace(" "),
-            tokenDouble(3.14), tokenUnquoted(" "), tokenFalse, tokenUnquoted(" "), tokenNull,
+            Tokens.CLOSE_SQUARE, tokenWhitespace(" "), Tokens.OPEN_SQUARE, tokenWhitespace(" "), Tokens.PLUS_EQUALS, tokenWhitespace(" "),
            tokenString("foo"), tokenUnquoted(" "), tokenString("bar"), tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "),
            tokenTrue, tokenUnquoted(" "), tokenDouble(3.14), tokenUnquoted(" "), tokenFalse, tokenUnquoted(" "), tokenNull,
            tokenUnquoted(" "), tokenSubstitution(tokenUnquoted("a.b")), tokenUnquoted(" "),
            tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenUnquoted(" "),
-            tokenKeySubstitution("c.d"),
+            tokenKeySubstitution("c.d"), tokenWhitespace(" "),
-            tokenLine(1), Tokens.END)
+            tokenLine(1), tokenWhitespace(" "))
-        assertEquals(expected, tokenizeAsList(""" , : = } { ] [ += "foo" """ + "\"\"\"bar\"\"\"" + """ 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n "))
+        tokenizerTest(expected, source)
    }
    @Test
    def tokenizeAllTypesWithMultipleSpaces() {
-        val expected = List(Tokens.START, Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY,
+        val source = """   ,   :   =   }   {   ]   [   +=   "foo"   """ + "\"\"\"bar\"\"\"" + """   42   true   3.14   false   null   ${a.b}   ${?x.y}   ${"c.d"}  """ + "\n   "
-            Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"),
+        val expected = List(tokenWhitespace("   "), Tokens.COMMA, tokenWhitespace("   "), Tokens.COLON, tokenWhitespace("   "),
            Tokens.EQUALS, tokenWhitespace("   "), Tokens.CLOSE_CURLY, tokenWhitespace("   "), Tokens.OPEN_CURLY, tokenWhitespace("   "), Tokens.CLOSE_SQUARE,
            tokenWhitespace("   "), Tokens.OPEN_SQUARE, tokenWhitespace("   "), Tokens.PLUS_EQUALS, tokenWhitespace("   "), tokenString("foo"),
            tokenUnquoted("   "), tokenString("bar"), tokenUnquoted("   "), tokenLong(42), tokenUnquoted("   "), tokenTrue, tokenUnquoted("   "),
            tokenDouble(3.14), tokenUnquoted("   "), tokenFalse, tokenUnquoted("   "), tokenNull,
            tokenUnquoted("   "), tokenSubstitution(tokenUnquoted("a.b")), tokenUnquoted("   "),
            tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenUnquoted("   "),
-            tokenKeySubstitution("c.d"),
+            tokenKeySubstitution("c.d"), tokenWhitespace("  "),
-            tokenLine(1), Tokens.END)
+            tokenLine(1), tokenWhitespace("   "))
-        assertEquals(expected, tokenizeAsList("""   ,   :   =   }   {   ]   [   +=   "foo"   """ + "\"\"\"bar\"\"\"" + """   42   true   3.14   false   null   ${a.b}   ${?x.y}   ${"c.d"}  """ + "\n   "))
+        tokenizerTest(expected, source)
    }
    @Test
    def tokenizeTrueAndUnquotedText() {
-        val expected = List(Tokens.START, tokenTrue, tokenUnquoted("foo"), Tokens.END)
+        val source = """truefoo"""
-        assertEquals(expected, tokenizeAsList("""truefoo"""))
+        val expected = List(tokenTrue, tokenUnquoted("foo"))
        tokenizerTest(expected, source)
    }
    @Test
    def tokenizeFalseAndUnquotedText() {
-        val expected = List(Tokens.START, tokenFalse, tokenUnquoted("foo"), Tokens.END)
+        val source = """falsefoo"""
-        assertEquals(expected, tokenizeAsList("""falsefoo"""))
+        val expected = List(tokenFalse, tokenUnquoted("foo"))
        tokenizerTest(expected, source)
    }
    @Test
    def tokenizeNullAndUnquotedText() {
-        val expected = List(Tokens.START, tokenNull, tokenUnquoted("foo"), Tokens.END)
+        val source = """nullfoo"""
-        assertEquals(expected, tokenizeAsList("""nullfoo"""))
+        val expected = List(tokenNull, tokenUnquoted("foo"))
        tokenizerTest(expected, source)
    }
    @Test
    def tokenizeUnquotedTextContainingTrue() {
-        val expected = List(Tokens.START, tokenUnquoted("footrue"), Tokens.END)
+        val source = """footrue"""
-        assertEquals(expected, tokenizeAsList("""footrue"""))
+        val expected = List(tokenUnquoted("footrue"))
        tokenizerTest(expected, source)
    }
    @Test
    def tokenizeUnquotedTextContainingSpaceTrue() {
-        val expected = List(Tokens.START, tokenUnquoted("foo"), tokenUnquoted(" "), tokenTrue, Tokens.END)
+        val source = """foo true"""
-        assertEquals(expected, tokenizeAsList("""foo true"""))
+        val expected = List(tokenUnquoted("foo"), tokenUnquoted(" "), tokenTrue)
        tokenizerTest(expected, source)
    }
    @Test
    def tokenizeTrueAndSpaceAndUnquotedText() {
-        val expected = List(Tokens.START, tokenTrue, tokenUnquoted(" "), tokenUnquoted("foo"), Tokens.END)
+        val source = """true foo"""
-        assertEquals(expected, tokenizeAsList("""true foo"""))
+        val expected = List(tokenTrue, tokenUnquoted(" "), tokenUnquoted("foo"))
        tokenizerTest(expected, source)
    }
    @Test
@ -109,28 +123,33 @@ class TokenizerTest extends TestUtils {
        tokenizerTest(List(tokenUnquoted("a/b/c/")), "a/b/c/")
        tokenizerTest(List(tokenUnquoted("/")), "/")
        tokenizerTest(List(tokenUnquoted("/"), tokenUnquoted(" "), tokenUnquoted("/")), "/ /")
-        tokenizerTest(List(tokenComment("")), "//")
+        tokenizerTest(List(tokenCommentDoubleSlash("")), "//")
    }
    @Test
-    def tokenizeUnquotedTextTrimsSpaces() {
+    def tokenizeUnquotedTextKeepsSpaces() {
-        val expected = List(Tokens.START, tokenUnquoted("foo"), tokenLine(1), Tokens.END)
+        val source = "    foo     \n"
-        assertEquals(expected, tokenizeAsList("    foo     \n"))
+        val expected = List(tokenWhitespace("    "), tokenUnquoted("foo"), tokenWhitespace("     "),
            tokenLine(1))
        tokenizerTest(expected, source)
    }
    @Test
    def tokenizeUnquotedTextKeepsInternalSpaces() {
-        val expected = List(Tokens.START, tokenUnquoted("foo"), tokenUnquoted("  "), tokenUnquoted("bar"),
+        val source = "    foo  bar baz   \n"
-            tokenUnquoted(" "), tokenUnquoted("baz"), tokenLine(1), Tokens.END)
+        val expected = List(tokenWhitespace("    "), tokenUnquoted("foo"), tokenUnquoted("  "),
-        assertEquals(expected, tokenizeAsList("    foo  bar baz   \n"))
+            tokenUnquoted("bar"), tokenUnquoted(" "), tokenUnquoted("baz"), tokenWhitespace("   "),
            tokenLine(1))
        tokenizerTest(expected, source)
    }
    @Test
    def tokenizeMixedUnquotedQuoted() {
-        val expected = List(Tokens.START, tokenUnquoted("foo"),
+        val source = "    foo\"bar\"baz   \n"
-            tokenString("bar"), tokenUnquoted("baz"),
+        val expected = List(tokenWhitespace("    "), tokenUnquoted("foo"),
-            tokenLine(1), Tokens.END)
+            tokenString("bar"), tokenUnquoted("baz"), tokenWhitespace("   "),
-        assertEquals(expected, tokenizeAsList("    foo\"bar\"baz   \n"))
+            tokenLine(1))
        tokenizerTest(expected, source)
    }
    @Test
@ -147,13 +166,14 @@ class TokenizerTest extends TestUtils {
        val tests = List[UnescapeTest]((""" "" """, ""),
            (" \"\\u0000\" ", Character.toString(0)), // nul byte
            (""" "\"\\\/\b\f\n\r\t" """, "\"\\/\b\f\n\r\t"),
-            ("\"\\u0046\"", "F"),
+            (" \"\\u0046\" ", "F"),
-            ("\"\\u0046\\u0046\"", "FF"))
+            (" \"\\u0046\\u0046\" ", "FF"))
        for (t <- tests) {
            describeFailure(t.toString) {
-                assertEquals(List(Tokens.START, Tokens.newValue(t.result), Tokens.END),
+                val expected = List(tokenWhitespace(" "), Tokens.newValue(t.result, t.toString),
-                    tokenizeAsList(t.escaped))
+                  tokenWhitespace(" "))
                tokenizerTest(expected, t.escaped)
            }
        }
    }
@ -182,32 +202,37 @@ class TokenizerTest extends TestUtils {
    @Test
    def tokenizerEmptyTripleQuoted(): Unit = {
-        assertEquals(List(Tokens.START, tokenString(""), Tokens.END),
+        val source = "\"\"\"\"\"\""
-            tokenizeAsList("\"\"\"\"\"\""))
+        val expected = List(tokenString(""))
        tokenizerTest(expected, source)
    }
    @Test
    def tokenizerTrivialTripleQuoted(): Unit = {
-        assertEquals(List(Tokens.START, tokenString("bar"), Tokens.END),
+        val source = "\"\"\"bar\"\"\""
-            tokenizeAsList("\"\"\"bar\"\"\""))
+        val expected = List(tokenString("bar"))
        tokenizerTest(expected, source)
    }
    @Test
    def tokenizerNoEscapesInTripleQuoted(): Unit = {
-        assertEquals(List(Tokens.START, tokenString("\\n"), Tokens.END),
+        val source = "\"\"\"\\n\"\"\""
-            tokenizeAsList("\"\"\"\\n\"\"\""))
+        val expected = List(tokenString("\\n"))
        tokenizerTest(expected, source)
    }
    @Test
    def tokenizerTrailingQuotesInTripleQuoted(): Unit = {
-        assertEquals(List(Tokens.START, tokenString("\"\"\""), Tokens.END),
+        val source = "\"\"\"\"\"\"\"\"\""
-            tokenizeAsList("\"\"\"\"\"\"\"\"\""))
+        val expected = List(tokenString("\"\"\""))
        tokenizerTest(expected, source)
    }
    @Test
    def tokenizerNewlineInTripleQuoted(): Unit = {
-        assertEquals(List(Tokens.START, tokenString("foo\nbar"), Tokens.END),
+        val source = "\"\"\"foo\nbar\"\"\""
-            tokenizeAsList("\"\"\"foo\nbar\"\"\""))
+        val expected = List(tokenString("foo\nbar"))
        tokenizerTest(expected, source)
    }
    @Test
@ -229,8 +254,8 @@ class TokenizerTest extends TestUtils {
        for (t <- tests) {
            describeFailure(t.toString()) {
-                assertEquals(List(Tokens.START, t.result, Tokens.END),
+                val expected = List(t.result)
-                    tokenizeAsList(t.s))
+                tokenizerTest(expected, t.s)
            }
        }
    }
@ -239,15 +264,30 @@ class TokenizerTest extends TestUtils {
    def commentsHandledInVariousContexts() {
        tokenizerTest(List(tokenString("//bar")), "\"//bar\"")
        tokenizerTest(List(tokenString("#bar")), "\"#bar\"")
-        tokenizerTest(List(tokenUnquoted("bar"), tokenComment("comment")), "bar//comment")
+        tokenizerTest(List(tokenUnquoted("bar"), tokenCommentDoubleSlash("comment")), "bar//comment")
-        tokenizerTest(List(tokenUnquoted("bar"), tokenComment("comment")), "bar#comment")
+        tokenizerTest(List(tokenUnquoted("bar"), tokenCommentHash("comment")), "bar#comment")
-        tokenizerTest(List(tokenInt(10), tokenComment("comment")), "10//comment")
+        tokenizerTest(List(tokenInt(10), tokenCommentDoubleSlash("comment")), "10//comment")
-        tokenizerTest(List(tokenInt(10), tokenComment("comment")), "10#comment")
+        tokenizerTest(List(tokenInt(10), tokenCommentHash("comment")), "10#comment")
-        tokenizerTest(List(tokenDouble(3.14), tokenComment("comment")), "3.14//comment")
+        tokenizerTest(List(tokenDouble(3.14), tokenCommentDoubleSlash("comment")), "3.14//comment")
-        tokenizerTest(List(tokenDouble(3.14), tokenComment("comment")), "3.14#comment")
+        tokenizerTest(List(tokenDouble(3.14), tokenCommentHash("comment")), "3.14#comment")
        // be sure we keep the newline
-        tokenizerTest(List(tokenInt(10), tokenComment("comment"), tokenLine(1), tokenInt(12)), "10//comment\n12")
+        tokenizerTest(List(tokenInt(10), tokenCommentDoubleSlash("comment"), tokenLine(1), tokenInt(12)), "10//comment\n12")
-        tokenizerTest(List(tokenInt(10), tokenComment("comment"), tokenLine(1), tokenInt(12)), "10#comment\n12")
+        tokenizerTest(List(tokenInt(10), tokenCommentHash("comment"), tokenLine(1), tokenInt(12)), "10#comment\n12")
        // be sure we handle multi-line comments
        tokenizerTest(List(tokenCommentDoubleSlash("comment"), tokenLine(1), tokenCommentDoubleSlash("comment2")),
                      "//comment\n//comment2")
        tokenizerTest(List(tokenCommentHash("comment"), tokenLine(1), tokenCommentHash("comment2")),
                      "#comment\n#comment2")
        tokenizerTest(List(tokenWhitespace("        "), tokenCommentDoubleSlash("comment\r"),
                           tokenLine(1), tokenWhitespace("        "), tokenCommentDoubleSlash("comment2        "),
                           tokenLine(2), tokenCommentDoubleSlash("comment3        "),
                           tokenLine(3), tokenLine(4), tokenCommentDoubleSlash("comment4")),
                      "        //comment\r\n        //comment2        \n//comment3        \n\n//comment4")
        tokenizerTest(List(tokenWhitespace("        "), tokenCommentDoubleSlash("comment\r"),
                           tokenLine(1), tokenWhitespace("        "), tokenCommentDoubleSlash("comment2        "),
                           tokenLine(2), tokenCommentDoubleSlash("comment3        "),
                           tokenLine(3), tokenLine(4), tokenCommentDoubleSlash("comment4")),
                      "        //comment\r\n        //comment2        \n//comment3        \n\n//comment4")
    }
    @Test