Cleanup lossless tokens code based on feedback

Clean up the lossless tokens code based on feedback. * Change nextTokenFromIterator() method to nextTokenIgnoringWhitespace() * Use StringBuilder in Tokenizer.render() * Extract Whitespace token creation into a `createWhitespaceTokenFromSaver` method * Add DoubleSlashComment and HashComment subclasses of Comment. Make Comment abstract. * Make `tokenText()` method virtual and overload it in subclasses that derive their original token text entirely from other fields. * Use `tokenizerTest` method in all relevant Tokenizer tests * Add extra multi-line comment Tokenizer tests
2025-04-15 05:30:43 +08:00 · 2015-03-03 15:27:41 -08:00 · 2015-03-03 15:27:41 -08:00 · cb86fb136e
commit cb86fb136e
parent 2b1c7ef13d
6 changed files with 161 additions and 130 deletions
--- a/config/src/main/java/com/typesafe/config/impl/Parser.java
+++ b/config/src/main/java/com/typesafe/config/impl/Parser.java
@ -203,7 +203,7 @@ final class Parser {
                }

                previous = next;
-                next = nextTokenFromIterator();
+                next = nextTokenIgnoringWhitespace();
            }

            // put our concluding token in the queue with all the comments
@ -219,7 +219,7 @@ final class Parser {

        private TokenWithComments popTokenWithoutTrailingComment() {
            if (buffer.isEmpty()) {
-                Token t = nextTokenFromIterator();
+                Token t = nextTokenIgnoringWhitespace();
                if (Tokens.isComment(t)) {
                    consolidateCommentBlock(t);
                    return buffer.pop();
@ -243,7 +243,7 @@ final class Parser {
            if (!attractsTrailingComments(withPrecedingComments.token)) {
                return withPrecedingComments;
            } else if (buffer.isEmpty()) {
-                Token after = nextTokenFromIterator();
+                Token after = nextTokenIgnoringWhitespace();
                if (Tokens.isComment(after)) {
                    return withPrecedingComments.add(after);
                } else {
@ -321,7 +321,7 @@ final class Parser {

        // Grabs the next Token off of the TokenIterator, ignoring
        // IgnoredWhitespace tokens
-        private Token nextTokenFromIterator() {
+        private Token nextTokenIgnoringWhitespace() {
            Token t;
            do {
                t = tokens.next();
--- a/config/src/main/java/com/typesafe/config/impl/Token.java
+++ b/config/src/main/java/com/typesafe/config/impl/Token.java
@ -36,7 +36,7 @@ class Token {
        return tokenType;
    }

-    final String tokenText() { return tokenText; }
+    public String tokenText() { return tokenText; }

    // this is final because we don't always use the origin() accessor,
    // and we don't because it throws if origin is null
--- a/config/src/main/java/com/typesafe/config/impl/Tokenizer.java
+++ b/config/src/main/java/com/typesafe/config/impl/Tokenizer.java
@ -53,11 +53,11 @@ final class Tokenizer {
    }

    static String render(Iterator<Token> tokens) {
-        String renderedText = "";
+        StringBuilder renderedText = new StringBuilder();
        while (tokens.hasNext()) {
-            renderedText += tokens.next().tokenText();
+            renderedText.append(tokens.next().tokenText());
        }
-        return renderedText;
+        return renderedText.toString();
    }

    private static class TokenIterator implements Iterator<Token> {
@ -90,14 +90,7 @@ final class Tokenizer {
            // simple values.
            private Token nextIsNotASimpleValue(ConfigOrigin baseOrigin, int lineNumber) {
                lastTokenWasSimpleValue = false;
-
-                if (whitespace.length() > 0) {
-                    Token t = Tokens.newIgnoredWhitespace(lineOrigin(baseOrigin, lineNumber),
-                            whitespace.toString());
-                    whitespace.setLength(0);
-                    return t;
-                }
-                return null;
+                return createWhitespaceTokenFromSaver(baseOrigin, lineNumber);
            }

            // called if the next token IS a simple value,
@ -105,29 +98,29 @@ final class Tokenizer {
            // token also was.
            private Token nextIsASimpleValue(ConfigOrigin baseOrigin,
                    int lineNumber) {
-                if (lastTokenWasSimpleValue) {
-                    // need to save whitespace between the two so
-                    // the parser has the option to concatenate it.
-                    if (whitespace.length() > 0) {
-                        Token t = Tokens.newUnquotedText(
-                                lineOrigin(baseOrigin, lineNumber),
-                                whitespace.toString());
-                        whitespace.setLength(0); // reset
-                        return t;
-                    } else {
-                        // lastTokenWasSimpleValue = true still
-                        return null;
-                    }
-                } else {
+                Token t = createWhitespaceTokenFromSaver(baseOrigin, lineNumber);
+                if (!lastTokenWasSimpleValue) {
                    lastTokenWasSimpleValue = true;
-                    if (whitespace.length() > 0) {
-                        Token t = Tokens.newIgnoredWhitespace(lineOrigin(baseOrigin, lineNumber),
-                                whitespace.toString());
-                        whitespace.setLength(0);
-                        return t;
-                    }
-                    return null;
                }
+                return t;
+            }
+
+            private Token createWhitespaceTokenFromSaver(ConfigOrigin baseOrigin,
+                                                         int lineNumber) {
+                if (whitespace.length() > 0) {
+                    Token t;
+                    if (lastTokenWasSimpleValue) {
+                        t = Tokens.newUnquotedText(
+                            lineOrigin(baseOrigin, lineNumber),
+                            whitespace.toString());
+                    } else {
+                        t = Tokens.newIgnoredWhitespace(lineOrigin(baseOrigin, lineNumber),
+                                                        whitespace.toString());
+                    }
+                    whitespace.setLength(0); // reset
+                    return t;
+                }
+                return null;
            }
        }

@ -291,7 +284,10 @@ final class Tokenizer {
                int c = nextCharRaw();
                if (c == -1 || c == '\n') {
                    putBack(c);
-                    return Tokens.newComment(lineOrigin, sb.toString(), doubleSlash);
+                    if (doubleSlash)
+                        return Tokens.newCommentDoubleSlash(lineOrigin, sb.toString());
+                    else
+                        return Tokens.newCommentHash(lineOrigin, sb.toString());
                } else {
                    sb.appendCodePoint(c);
                }
--- a/config/src/main/java/com/typesafe/config/impl/Tokens.java
+++ b/config/src/main/java/com/typesafe/config/impl/Tokens.java
@ -54,7 +54,7 @@ final class Tokens {

    static private class Line extends Token {
        Line(ConfigOrigin origin) {
-            super(TokenType.NEWLINE, origin, "\n");
+            super(TokenType.NEWLINE, origin);
        }

        @Override
@ -76,6 +76,11 @@ final class Tokens {
        public int hashCode() {
            return 41 * (41 + super.hashCode()) + lineNumber();
        }
+
+        @Override
+        public String tokenText() {
+            return "\n";
+        }
    }

    // This is not a Value, because it requires special processing
@ -83,7 +88,7 @@ final class Tokens {
        final private String value;

        UnquotedText(ConfigOrigin origin, String s) {
-            super(TokenType.UNQUOTED_TEXT, origin, s);
+            super(TokenType.UNQUOTED_TEXT, origin);
            this.value = s;
        }

@ -111,13 +116,18 @@ final class Tokens {
        public int hashCode() {
            return 41 * (41 + super.hashCode()) + value.hashCode();
        }
+
+        @Override
+        public String tokenText() {
+            return value;
+        }
    }

    static private class IgnoredWhitespace extends Token {
        final private String value;

        IgnoredWhitespace(ConfigOrigin origin, String s) {
-            super(TokenType.IGNORED_WHITESPACE, origin, s);
+            super(TokenType.IGNORED_WHITESPACE, origin);
            this.value = s;
        }

@ -125,6 +135,11 @@ final class Tokens {

        @Override
        public String toString() { return "'" + value + "' (WHITESPACE)"; }
+
+        @Override
+        public String tokenText() {
+            return value;
+        }
    }

    static private class Problem extends Token {
@ -195,14 +210,36 @@ final class Tokens {
        }
    }

-    static private class Comment extends Token {
+    static private abstract class Comment extends Token {
        final private String text;

-        Comment(ConfigOrigin origin, String text, boolean doubleSlash) {
-            super(TokenType.COMMENT, origin, (doubleSlash? "//" : "#") + text);
+        Comment(ConfigOrigin origin, String text) {
+            super(TokenType.COMMENT, origin);
            this.text = text;
        }

+        final static class DoubleSlashComment extends Comment {
+            DoubleSlashComment(ConfigOrigin origin, String text) {
+                super(origin, text);
+            }
+
+            @Override
+            public String tokenText() {
+                return "//" + super.text;
+            }
+        }
+
+        final static class HashComment extends Comment {
+            HashComment(ConfigOrigin origin, String text) {
+                super(origin, text);
+            }
+
+            @Override
+            public String tokenText() {
+                return "#" + super.text;
+            }
+        }
+
        String text() {
            return text;
        }
@ -240,8 +277,7 @@ final class Tokens {
        final private List<Token> value;

        Substitution(ConfigOrigin origin, boolean optional, List<Token> expression) {
-            super(TokenType.SUBSTITUTION, origin,
-                    "${" + (optional? "?" : "") + Tokenizer.render(expression.iterator()) + "}");
+            super(TokenType.SUBSTITUTION, origin);
            this.optional = optional;
            this.value = expression;
        }
@ -254,6 +290,11 @@ final class Tokens {
            return value;
        }

+        @Override
+        public String tokenText() {
+            return "${" + (this.optional? "?" : "") + Tokenizer.render(this.value.iterator()) + "}";
+        }
+
        @Override
        public String toString() {
            StringBuilder sb = new StringBuilder();
@ -409,8 +450,12 @@ final class Tokens {
        return new Problem(origin, what, message, suggestQuotes, cause);
    }

-    static Token newComment(ConfigOrigin origin, String text, boolean doubleSlash) {
-        return new Comment(origin, text, doubleSlash);
+    static Token newCommentDoubleSlash(ConfigOrigin origin, String text) {
+        return new Comment.DoubleSlashComment(origin, text);
+    }
+
+    static Token newCommentHash(ConfigOrigin origin, String text) {
+        return new Comment.HashComment(origin, text);
    }

    static Token newUnquotedText(ConfigOrigin origin, String s) {
--- a/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala
+++ b/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala
@ -616,7 +616,8 @@ abstract trait TestUtils {
    def tokenInt(i: Int) = Tokens.newInt(fakeOrigin(), i, null)
    def tokenLong(l: Long) = Tokens.newLong(fakeOrigin(), l, null)
    def tokenLine(line: Int) = Tokens.newLine(fakeOrigin.withLineNumber(line))
-    def tokenComment(text: String, doubleSlash : Boolean) = Tokens.newComment(fakeOrigin(), text, doubleSlash)
+    def tokenCommentDoubleSlash(text: String) = Tokens.newCommentDoubleSlash(fakeOrigin(), text)
+    def tokenCommentHash(text: String) = Tokens.newCommentHash(fakeOrigin(), text)
    def tokenWhitespace(text: String) = Tokens.newIgnoredWhitespace(fakeOrigin(), text)

    private def tokenMaybeOptionalSubstitution(optional: Boolean, expression: Token*) = {
--- a/config/src/test/scala/com/typesafe/config/impl/TokenizerTest.scala
+++ b/config/src/test/scala/com/typesafe/config/impl/TokenizerTest.scala
@ -20,17 +20,15 @@ class TokenizerTest extends TestUtils {
    @Test
    def tokenizeEmptyString() {
        val source = ""
-        assertEquals(List(Tokens.START, Tokens.END),
-            tokenizeAsList(source))
-        assertEquals(source, tokenizeAsString(source))
+        val expected = List()
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeNewlines() {
        val source = "\n\n"
-        assertEquals(List(Tokens.START, tokenLine(1), tokenLine(2), Tokens.END),
-            tokenizeAsList(source))
-        assertEquals(source, tokenizeAsString(source))
+        val expected = List(tokenLine(1), tokenLine(2))
+        tokenizerTest(expected, source)
    }

    @Test
@ -39,20 +37,19 @@ class TokenizerTest extends TestUtils {
        // but spec is unclear to me when spaces are required, and banning them
        // is actually extra work).
        val source = """,:=}{][+="foo"""" + "\"\"\"bar\"\"\"" + """true3.14false42null${a.b}${?x.y}${"c.d"}""" + "\n"
-        val expected = List(Tokens.START, Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY,
+        val expected = List(Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY,
            Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"),
            tokenString("bar"), tokenTrue, tokenDouble(3.14), tokenFalse,
            tokenLong(42), tokenNull, tokenSubstitution(tokenUnquoted("a.b")),
            tokenOptionalSubstitution(tokenUnquoted("x.y")),
-            tokenKeySubstitution("c.d"), tokenLine(1), Tokens.END)
-        assertEquals(expected, tokenizeAsList(source))
-        assertEquals(source, tokenizeAsString(source))
+            tokenKeySubstitution("c.d"), tokenLine(1))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeAllTypesWithSingleSpaces() {
        val source = """ , : = } { ] [ += "foo" """ + "\"\"\"bar\"\"\"" + """ 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n "
-        val expected = List(Tokens.START, tokenWhitespace(" "), Tokens.COMMA, tokenWhitespace(" "), Tokens.COLON, tokenWhitespace(" "),
+        val expected = List(tokenWhitespace(" "), Tokens.COMMA, tokenWhitespace(" "), Tokens.COLON, tokenWhitespace(" "),
            Tokens.EQUALS, tokenWhitespace(" "), Tokens.CLOSE_CURLY, tokenWhitespace(" "), Tokens.OPEN_CURLY, tokenWhitespace(" "),
            Tokens.CLOSE_SQUARE, tokenWhitespace(" "), Tokens.OPEN_SQUARE, tokenWhitespace(" "), Tokens.PLUS_EQUALS, tokenWhitespace(" "),
            tokenString("foo"), tokenUnquoted(" "), tokenString("bar"), tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "),
@ -60,15 +57,14 @@ class TokenizerTest extends TestUtils {
            tokenUnquoted(" "), tokenSubstitution(tokenUnquoted("a.b")), tokenUnquoted(" "),
            tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenUnquoted(" "),
            tokenKeySubstitution("c.d"), tokenWhitespace(" "),
-            tokenLine(1), tokenWhitespace(" "), Tokens.END)
-        assertEquals(expected, tokenizeAsList(source))
-        assertEquals(source, tokenizeAsString(source))
+            tokenLine(1), tokenWhitespace(" "))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeAllTypesWithMultipleSpaces() {
        val source = """   ,   :   =   }   {   ]   [   +=   "foo"   """ + "\"\"\"bar\"\"\"" + """   42   true   3.14   false   null   ${a.b}   ${?x.y}   ${"c.d"}  """ + "\n   "
-        val expected = List(Tokens.START, tokenWhitespace("   "), Tokens.COMMA, tokenWhitespace("   "), Tokens.COLON, tokenWhitespace("   "),
+        val expected = List(tokenWhitespace("   "), Tokens.COMMA, tokenWhitespace("   "), Tokens.COLON, tokenWhitespace("   "),
            Tokens.EQUALS, tokenWhitespace("   "), Tokens.CLOSE_CURLY, tokenWhitespace("   "), Tokens.OPEN_CURLY, tokenWhitespace("   "), Tokens.CLOSE_SQUARE,
            tokenWhitespace("   "), Tokens.OPEN_SQUARE, tokenWhitespace("   "), Tokens.PLUS_EQUALS, tokenWhitespace("   "), tokenString("foo"),
            tokenUnquoted("   "), tokenString("bar"), tokenUnquoted("   "), tokenLong(42), tokenUnquoted("   "), tokenTrue, tokenUnquoted("   "),
@ -76,57 +72,50 @@ class TokenizerTest extends TestUtils {
            tokenUnquoted("   "), tokenSubstitution(tokenUnquoted("a.b")), tokenUnquoted("   "),
            tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenUnquoted("   "),
            tokenKeySubstitution("c.d"), tokenWhitespace("  "),
-            tokenLine(1), tokenWhitespace("   "), Tokens.END)
-        assertEquals(expected, tokenizeAsList(source))
-        assertEquals(source, tokenizeAsString(source))
+            tokenLine(1), tokenWhitespace("   "))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeTrueAndUnquotedText() {
        val source = """truefoo"""
-        val expected = List(Tokens.START, tokenTrue, tokenUnquoted("foo"), Tokens.END)
-        assertEquals(expected, tokenizeAsList(source))
-        assertEquals(source, tokenizeAsString(source))
+        val expected = List(tokenTrue, tokenUnquoted("foo"))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeFalseAndUnquotedText() {
        val source = """falsefoo"""
-        val expected = List(Tokens.START, tokenFalse, tokenUnquoted("foo"), Tokens.END)
-        assertEquals(expected, tokenizeAsList(source))
-        assertEquals(source, tokenizeAsString(source))
+        val expected = List(tokenFalse, tokenUnquoted("foo"))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeNullAndUnquotedText() {
        val source = """nullfoo"""
-        val expected = List(Tokens.START, tokenNull, tokenUnquoted("foo"), Tokens.END)
-        assertEquals(expected, tokenizeAsList(source))
-        assertEquals(source, tokenizeAsString(source))
+        val expected = List(tokenNull, tokenUnquoted("foo"))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeUnquotedTextContainingTrue() {
        val source = """footrue"""
-        val expected = List(Tokens.START, tokenUnquoted("footrue"), Tokens.END)
-        assertEquals(expected, tokenizeAsList(source))
-        assertEquals(source, tokenizeAsString(source))
+        val expected = List(tokenUnquoted("footrue"))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeUnquotedTextContainingSpaceTrue() {
        val source = """foo true"""
-        val expected = List(Tokens.START, tokenUnquoted("foo"), tokenUnquoted(" "), tokenTrue, Tokens.END)
-        assertEquals(expected, tokenizeAsList(source))
-        assertEquals(source, tokenizeAsString(source))
+        val expected = List(tokenUnquoted("foo"), tokenUnquoted(" "), tokenTrue)
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeTrueAndSpaceAndUnquotedText() {
        val source = """true foo"""
-        val expected = List(Tokens.START, tokenTrue, tokenUnquoted(" "), tokenUnquoted("foo"), Tokens.END)
-        assertEquals(expected, tokenizeAsList(source))
-        assertEquals(source, tokenizeAsString(source))
+        val expected = List(tokenTrue, tokenUnquoted(" "), tokenUnquoted("foo"))
+        tokenizerTest(expected, source)
    }

    @Test
@ -134,36 +123,33 @@ class TokenizerTest extends TestUtils {
        tokenizerTest(List(tokenUnquoted("a/b/c/")), "a/b/c/")
        tokenizerTest(List(tokenUnquoted("/")), "/")
        tokenizerTest(List(tokenUnquoted("/"), tokenUnquoted(" "), tokenUnquoted("/")), "/ /")
-        tokenizerTest(List(tokenComment("", true)), "//")
+        tokenizerTest(List(tokenCommentDoubleSlash("")), "//")
    }

    @Test
    def tokenizeUnquotedTextKeepsSpaces() {
        val source = "    foo     \n"
-        val expected = List(Tokens.START, tokenWhitespace("    "), tokenUnquoted("foo"), tokenWhitespace("     "),
-            tokenLine(1), Tokens.END)
-        assertEquals(expected, tokenizeAsList(source))
-        assertEquals(source, tokenizeAsString(source))
+        val expected = List(tokenWhitespace("    "), tokenUnquoted("foo"), tokenWhitespace("     "),
+            tokenLine(1))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeUnquotedTextKeepsInternalSpaces() {
        val source = "    foo  bar baz   \n"
-        val expected = List(Tokens.START, tokenWhitespace("    "), tokenUnquoted("foo"), tokenUnquoted("  "),
+        val expected = List(tokenWhitespace("    "), tokenUnquoted("foo"), tokenUnquoted("  "),
            tokenUnquoted("bar"), tokenUnquoted(" "), tokenUnquoted("baz"), tokenWhitespace("   "),
-            tokenLine(1), Tokens.END)
-        assertEquals(expected, tokenizeAsList("    foo  bar baz   \n"))
-        assertEquals(source, tokenizeAsString(source))
+            tokenLine(1))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizeMixedUnquotedQuoted() {
        val source = "    foo\"bar\"baz   \n"
-        val expected = List(Tokens.START, tokenWhitespace("    "), tokenUnquoted("foo"),
+        val expected = List(tokenWhitespace("    "), tokenUnquoted("foo"),
            tokenString("bar"), tokenUnquoted("baz"), tokenWhitespace("   "),
-            tokenLine(1), Tokens.END)
-        assertEquals(expected, tokenizeAsList("    foo\"bar\"baz   \n"))
-        assertEquals(source, tokenizeAsString(source))
+            tokenLine(1))
+        tokenizerTest(expected, source)
    }

    @Test
@ -185,10 +171,9 @@ class TokenizerTest extends TestUtils {

        for (t <- tests) {
            describeFailure(t.toString) {
-                assertEquals(List(Tokens.START, tokenWhitespace(" "), Tokens.newValue(t.result, t.toString),
-                    tokenWhitespace(" "), Tokens.END),
-                    tokenizeAsList(t.escaped))
-                assertEquals(t.escaped, tokenizeAsString(t.escaped))
+                val expected = List(tokenWhitespace(" "), Tokens.newValue(t.result, t.toString),
+                  tokenWhitespace(" "))
+                tokenizerTest(expected, t.escaped)
            }
        }
    }
@ -218,41 +203,36 @@ class TokenizerTest extends TestUtils {
    @Test
    def tokenizerEmptyTripleQuoted(): Unit = {
        val source = "\"\"\"\"\"\""
-        assertEquals(List(Tokens.START, tokenString(""), Tokens.END),
-            tokenizeAsList(source))
-        assertEquals(source, tokenizeAsString(source))
+        val expected = List(tokenString(""))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizerTrivialTripleQuoted(): Unit = {
        val source = "\"\"\"bar\"\"\""
-        assertEquals(List(Tokens.START, tokenString("bar"), Tokens.END),
-            tokenizeAsList(source))
-        assertEquals(source, tokenizeAsString(source))
+        val expected = List(tokenString("bar"))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizerNoEscapesInTripleQuoted(): Unit = {
        val source = "\"\"\"\\n\"\"\""
-        assertEquals(List(Tokens.START, tokenString("\\n"), Tokens.END),
-            tokenizeAsList(source))
-        assertEquals(source, tokenizeAsString(source))
+        val expected = List(tokenString("\\n"))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizerTrailingQuotesInTripleQuoted(): Unit = {
        val source = "\"\"\"\"\"\"\"\"\""
-        assertEquals(List(Tokens.START, tokenString("\"\"\""), Tokens.END),
-            tokenizeAsList(source))
-        assertEquals(source, tokenizeAsString(source))
+        val expected = List(tokenString("\"\"\""))
+        tokenizerTest(expected, source)
    }

    @Test
    def tokenizerNewlineInTripleQuoted(): Unit = {
        val source = "\"\"\"foo\nbar\"\"\""
-        assertEquals(List(Tokens.START, tokenString("foo\nbar"), Tokens.END),
-            tokenizeAsList(source))
-        assertEquals(source, tokenizeAsString(source))
+        val expected = List(tokenString("foo\nbar"))
+        tokenizerTest(expected, source)
    }

    @Test
@ -274,9 +254,8 @@ class TokenizerTest extends TestUtils {

        for (t <- tests) {
            describeFailure(t.toString()) {
-                assertEquals(List(Tokens.START, t.result, Tokens.END),
-                    tokenizeAsList(t.s))
-                assertEquals(t.s, tokenizeAsString(t.s))
+                val expected = List(t.result)
+                tokenizerTest(expected, t.s)
            }
        }
    }
@ -285,20 +264,30 @@ class TokenizerTest extends TestUtils {
    def commentsHandledInVariousContexts() {
        tokenizerTest(List(tokenString("//bar")), "\"//bar\"")
        tokenizerTest(List(tokenString("#bar")), "\"#bar\"")
-        tokenizerTest(List(tokenUnquoted("bar"), tokenComment("comment", true)), "bar//comment")
-        tokenizerTest(List(tokenUnquoted("bar"), tokenComment("comment", false)), "bar#comment")
-        tokenizerTest(List(tokenInt(10), tokenComment("comment", true)), "10//comment")
-        tokenizerTest(List(tokenInt(10), tokenComment("comment", false)), "10#comment")
-        tokenizerTest(List(tokenDouble(3.14), tokenComment("comment", true)), "3.14//comment")
-        tokenizerTest(List(tokenDouble(3.14), tokenComment("comment", false)), "3.14#comment")
+        tokenizerTest(List(tokenUnquoted("bar"), tokenCommentDoubleSlash("comment")), "bar//comment")
+        tokenizerTest(List(tokenUnquoted("bar"), tokenCommentHash("comment")), "bar#comment")
+        tokenizerTest(List(tokenInt(10), tokenCommentDoubleSlash("comment")), "10//comment")
+        tokenizerTest(List(tokenInt(10), tokenCommentHash("comment")), "10#comment")
+        tokenizerTest(List(tokenDouble(3.14), tokenCommentDoubleSlash("comment")), "3.14//comment")
+        tokenizerTest(List(tokenDouble(3.14), tokenCommentHash("comment")), "3.14#comment")
        // be sure we keep the newline
-        tokenizerTest(List(tokenInt(10), tokenComment("comment", true), tokenLine(1), tokenInt(12)), "10//comment\n12")
-        tokenizerTest(List(tokenInt(10), tokenComment("comment", false), tokenLine(1), tokenInt(12)), "10#comment\n12")
+        tokenizerTest(List(tokenInt(10), tokenCommentDoubleSlash("comment"), tokenLine(1), tokenInt(12)), "10//comment\n12")
+        tokenizerTest(List(tokenInt(10), tokenCommentHash("comment"), tokenLine(1), tokenInt(12)), "10#comment\n12")
        // be sure we handle multi-line comments
-        tokenizerTest(List(tokenComment("comment", true), tokenLine(1), tokenComment("comment2", true)),
+        tokenizerTest(List(tokenCommentDoubleSlash("comment"), tokenLine(1), tokenCommentDoubleSlash("comment2")),
                      "//comment\n//comment2")
-        tokenizerTest(List(tokenComment("comment", false), tokenLine(1), tokenComment("comment2", false)),
+        tokenizerTest(List(tokenCommentHash("comment"), tokenLine(1), tokenCommentHash("comment2")),
                      "#comment\n#comment2")
+        tokenizerTest(List(tokenWhitespace("        "), tokenCommentDoubleSlash("comment\r"),
+                           tokenLine(1), tokenWhitespace("        "), tokenCommentDoubleSlash("comment2        "),
+                           tokenLine(2), tokenCommentDoubleSlash("comment3        "),
+                           tokenLine(3), tokenLine(4), tokenCommentDoubleSlash("comment4")),
+                      "        //comment\r\n        //comment2        \n//comment3        \n\n//comment4")
+        tokenizerTest(List(tokenWhitespace("        "), tokenCommentDoubleSlash("comment\r"),
+                           tokenLine(1), tokenWhitespace("        "), tokenCommentDoubleSlash("comment2        "),
+                           tokenLine(2), tokenCommentDoubleSlash("comment3        "),
+                           tokenLine(3), tokenLine(4), tokenCommentDoubleSlash("comment4")),
+                      "        //comment\r\n        //comment2        \n//comment3        \n\n//comment4")
    }

    @Test