Cleanup lossless tokens code based on feedback

Clean up the lossless tokens code based on feedback.
  * Change nextTokenFromIterator() method to
    nextTokenIgnoringWhitespace()
  * Use StringBuilder in Tokenizer.render()
  * Extract Whitespace token creation into a
    `createWhitespaceTokenFromSaver` method
  * Add DoubleSlashComment and HashComment subclasses of
    Comment. Make Comment abstract.
  * Make `tokenText()` method virtual and overload it in
    subclasses that derive their original token text entirely
    from other fields.
  * Use `tokenizerTest` method in all relevant Tokenizer tests
  * Add extra multi-line comment Tokenizer tests
This commit is contained in:
Preben Ingvaldsen 2015-03-03 15:27:41 -08:00
parent 2b1c7ef13d
commit cb86fb136e
6 changed files with 161 additions and 130 deletions

View File

@ -203,7 +203,7 @@ final class Parser {
} }
previous = next; previous = next;
next = nextTokenFromIterator(); next = nextTokenIgnoringWhitespace();
} }
// put our concluding token in the queue with all the comments // put our concluding token in the queue with all the comments
@ -219,7 +219,7 @@ final class Parser {
private TokenWithComments popTokenWithoutTrailingComment() { private TokenWithComments popTokenWithoutTrailingComment() {
if (buffer.isEmpty()) { if (buffer.isEmpty()) {
Token t = nextTokenFromIterator(); Token t = nextTokenIgnoringWhitespace();
if (Tokens.isComment(t)) { if (Tokens.isComment(t)) {
consolidateCommentBlock(t); consolidateCommentBlock(t);
return buffer.pop(); return buffer.pop();
@ -243,7 +243,7 @@ final class Parser {
if (!attractsTrailingComments(withPrecedingComments.token)) { if (!attractsTrailingComments(withPrecedingComments.token)) {
return withPrecedingComments; return withPrecedingComments;
} else if (buffer.isEmpty()) { } else if (buffer.isEmpty()) {
Token after = nextTokenFromIterator(); Token after = nextTokenIgnoringWhitespace();
if (Tokens.isComment(after)) { if (Tokens.isComment(after)) {
return withPrecedingComments.add(after); return withPrecedingComments.add(after);
} else { } else {
@ -321,7 +321,7 @@ final class Parser {
// Grabs the next Token off of the TokenIterator, ignoring // Grabs the next Token off of the TokenIterator, ignoring
// IgnoredWhitespace tokens // IgnoredWhitespace tokens
private Token nextTokenFromIterator() { private Token nextTokenIgnoringWhitespace() {
Token t; Token t;
do { do {
t = tokens.next(); t = tokens.next();

View File

@ -36,7 +36,7 @@ class Token {
return tokenType; return tokenType;
} }
final String tokenText() { return tokenText; } public String tokenText() { return tokenText; }
// this is final because we don't always use the origin() accessor, // this is final because we don't always use the origin() accessor,
// and we don't because it throws if origin is null // and we don't because it throws if origin is null

View File

@ -53,11 +53,11 @@ final class Tokenizer {
} }
static String render(Iterator<Token> tokens) { static String render(Iterator<Token> tokens) {
String renderedText = ""; StringBuilder renderedText = new StringBuilder();
while (tokens.hasNext()) { while (tokens.hasNext()) {
renderedText += tokens.next().tokenText(); renderedText.append(tokens.next().tokenText());
} }
return renderedText; return renderedText.toString();
} }
private static class TokenIterator implements Iterator<Token> { private static class TokenIterator implements Iterator<Token> {
@ -90,14 +90,7 @@ final class Tokenizer {
// simple values. // simple values.
private Token nextIsNotASimpleValue(ConfigOrigin baseOrigin, int lineNumber) { private Token nextIsNotASimpleValue(ConfigOrigin baseOrigin, int lineNumber) {
lastTokenWasSimpleValue = false; lastTokenWasSimpleValue = false;
return createWhitespaceTokenFromSaver(baseOrigin, lineNumber);
if (whitespace.length() > 0) {
Token t = Tokens.newIgnoredWhitespace(lineOrigin(baseOrigin, lineNumber),
whitespace.toString());
whitespace.setLength(0);
return t;
}
return null;
} }
// called if the next token IS a simple value, // called if the next token IS a simple value,
@ -105,29 +98,29 @@ final class Tokenizer {
// token also was. // token also was.
private Token nextIsASimpleValue(ConfigOrigin baseOrigin, private Token nextIsASimpleValue(ConfigOrigin baseOrigin,
int lineNumber) { int lineNumber) {
if (lastTokenWasSimpleValue) { Token t = createWhitespaceTokenFromSaver(baseOrigin, lineNumber);
// need to save whitespace between the two so if (!lastTokenWasSimpleValue) {
// the parser has the option to concatenate it.
if (whitespace.length() > 0) {
Token t = Tokens.newUnquotedText(
lineOrigin(baseOrigin, lineNumber),
whitespace.toString());
whitespace.setLength(0); // reset
return t;
} else {
// lastTokenWasSimpleValue = true still
return null;
}
} else {
lastTokenWasSimpleValue = true; lastTokenWasSimpleValue = true;
if (whitespace.length() > 0) {
Token t = Tokens.newIgnoredWhitespace(lineOrigin(baseOrigin, lineNumber),
whitespace.toString());
whitespace.setLength(0);
return t;
}
return null;
} }
return t;
}
private Token createWhitespaceTokenFromSaver(ConfigOrigin baseOrigin,
int lineNumber) {
if (whitespace.length() > 0) {
Token t;
if (lastTokenWasSimpleValue) {
t = Tokens.newUnquotedText(
lineOrigin(baseOrigin, lineNumber),
whitespace.toString());
} else {
t = Tokens.newIgnoredWhitespace(lineOrigin(baseOrigin, lineNumber),
whitespace.toString());
}
whitespace.setLength(0); // reset
return t;
}
return null;
} }
} }
@ -291,7 +284,10 @@ final class Tokenizer {
int c = nextCharRaw(); int c = nextCharRaw();
if (c == -1 || c == '\n') { if (c == -1 || c == '\n') {
putBack(c); putBack(c);
return Tokens.newComment(lineOrigin, sb.toString(), doubleSlash); if (doubleSlash)
return Tokens.newCommentDoubleSlash(lineOrigin, sb.toString());
else
return Tokens.newCommentHash(lineOrigin, sb.toString());
} else { } else {
sb.appendCodePoint(c); sb.appendCodePoint(c);
} }

View File

@ -54,7 +54,7 @@ final class Tokens {
static private class Line extends Token { static private class Line extends Token {
Line(ConfigOrigin origin) { Line(ConfigOrigin origin) {
super(TokenType.NEWLINE, origin, "\n"); super(TokenType.NEWLINE, origin);
} }
@Override @Override
@ -76,6 +76,11 @@ final class Tokens {
public int hashCode() { public int hashCode() {
return 41 * (41 + super.hashCode()) + lineNumber(); return 41 * (41 + super.hashCode()) + lineNumber();
} }
@Override
public String tokenText() {
return "\n";
}
} }
// This is not a Value, because it requires special processing // This is not a Value, because it requires special processing
@ -83,7 +88,7 @@ final class Tokens {
final private String value; final private String value;
UnquotedText(ConfigOrigin origin, String s) { UnquotedText(ConfigOrigin origin, String s) {
super(TokenType.UNQUOTED_TEXT, origin, s); super(TokenType.UNQUOTED_TEXT, origin);
this.value = s; this.value = s;
} }
@ -111,13 +116,18 @@ final class Tokens {
public int hashCode() { public int hashCode() {
return 41 * (41 + super.hashCode()) + value.hashCode(); return 41 * (41 + super.hashCode()) + value.hashCode();
} }
@Override
public String tokenText() {
return value;
}
} }
static private class IgnoredWhitespace extends Token { static private class IgnoredWhitespace extends Token {
final private String value; final private String value;
IgnoredWhitespace(ConfigOrigin origin, String s) { IgnoredWhitespace(ConfigOrigin origin, String s) {
super(TokenType.IGNORED_WHITESPACE, origin, s); super(TokenType.IGNORED_WHITESPACE, origin);
this.value = s; this.value = s;
} }
@ -125,6 +135,11 @@ final class Tokens {
@Override @Override
public String toString() { return "'" + value + "' (WHITESPACE)"; } public String toString() { return "'" + value + "' (WHITESPACE)"; }
@Override
public String tokenText() {
return value;
}
} }
static private class Problem extends Token { static private class Problem extends Token {
@ -195,14 +210,36 @@ final class Tokens {
} }
} }
static private class Comment extends Token { static private abstract class Comment extends Token {
final private String text; final private String text;
Comment(ConfigOrigin origin, String text, boolean doubleSlash) { Comment(ConfigOrigin origin, String text) {
super(TokenType.COMMENT, origin, (doubleSlash? "//" : "#") + text); super(TokenType.COMMENT, origin);
this.text = text; this.text = text;
} }
final static class DoubleSlashComment extends Comment {
DoubleSlashComment(ConfigOrigin origin, String text) {
super(origin, text);
}
@Override
public String tokenText() {
return "//" + super.text;
}
}
final static class HashComment extends Comment {
HashComment(ConfigOrigin origin, String text) {
super(origin, text);
}
@Override
public String tokenText() {
return "#" + super.text;
}
}
String text() { String text() {
return text; return text;
} }
@ -240,8 +277,7 @@ final class Tokens {
final private List<Token> value; final private List<Token> value;
Substitution(ConfigOrigin origin, boolean optional, List<Token> expression) { Substitution(ConfigOrigin origin, boolean optional, List<Token> expression) {
super(TokenType.SUBSTITUTION, origin, super(TokenType.SUBSTITUTION, origin);
"${" + (optional? "?" : "") + Tokenizer.render(expression.iterator()) + "}");
this.optional = optional; this.optional = optional;
this.value = expression; this.value = expression;
} }
@ -254,6 +290,11 @@ final class Tokens {
return value; return value;
} }
@Override
public String tokenText() {
return "${" + (this.optional? "?" : "") + Tokenizer.render(this.value.iterator()) + "}";
}
@Override @Override
public String toString() { public String toString() {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
@ -409,8 +450,12 @@ final class Tokens {
return new Problem(origin, what, message, suggestQuotes, cause); return new Problem(origin, what, message, suggestQuotes, cause);
} }
static Token newComment(ConfigOrigin origin, String text, boolean doubleSlash) { static Token newCommentDoubleSlash(ConfigOrigin origin, String text) {
return new Comment(origin, text, doubleSlash); return new Comment.DoubleSlashComment(origin, text);
}
static Token newCommentHash(ConfigOrigin origin, String text) {
return new Comment.HashComment(origin, text);
} }
static Token newUnquotedText(ConfigOrigin origin, String s) { static Token newUnquotedText(ConfigOrigin origin, String s) {

View File

@ -616,7 +616,8 @@ abstract trait TestUtils {
def tokenInt(i: Int) = Tokens.newInt(fakeOrigin(), i, null) def tokenInt(i: Int) = Tokens.newInt(fakeOrigin(), i, null)
def tokenLong(l: Long) = Tokens.newLong(fakeOrigin(), l, null) def tokenLong(l: Long) = Tokens.newLong(fakeOrigin(), l, null)
def tokenLine(line: Int) = Tokens.newLine(fakeOrigin.withLineNumber(line)) def tokenLine(line: Int) = Tokens.newLine(fakeOrigin.withLineNumber(line))
def tokenComment(text: String, doubleSlash : Boolean) = Tokens.newComment(fakeOrigin(), text, doubleSlash) def tokenCommentDoubleSlash(text: String) = Tokens.newCommentDoubleSlash(fakeOrigin(), text)
def tokenCommentHash(text: String) = Tokens.newCommentHash(fakeOrigin(), text)
def tokenWhitespace(text: String) = Tokens.newIgnoredWhitespace(fakeOrigin(), text) def tokenWhitespace(text: String) = Tokens.newIgnoredWhitespace(fakeOrigin(), text)
private def tokenMaybeOptionalSubstitution(optional: Boolean, expression: Token*) = { private def tokenMaybeOptionalSubstitution(optional: Boolean, expression: Token*) = {

View File

@ -20,17 +20,15 @@ class TokenizerTest extends TestUtils {
@Test @Test
def tokenizeEmptyString() { def tokenizeEmptyString() {
val source = "" val source = ""
assertEquals(List(Tokens.START, Tokens.END), val expected = List()
tokenizeAsList(source)) tokenizerTest(expected, source)
assertEquals(source, tokenizeAsString(source))
} }
@Test @Test
def tokenizeNewlines() { def tokenizeNewlines() {
val source = "\n\n" val source = "\n\n"
assertEquals(List(Tokens.START, tokenLine(1), tokenLine(2), Tokens.END), val expected = List(tokenLine(1), tokenLine(2))
tokenizeAsList(source)) tokenizerTest(expected, source)
assertEquals(source, tokenizeAsString(source))
} }
@Test @Test
@ -39,20 +37,19 @@ class TokenizerTest extends TestUtils {
// but spec is unclear to me when spaces are required, and banning them // but spec is unclear to me when spaces are required, and banning them
// is actually extra work). // is actually extra work).
val source = """,:=}{][+="foo"""" + "\"\"\"bar\"\"\"" + """true3.14false42null${a.b}${?x.y}${"c.d"}""" + "\n" val source = """,:=}{][+="foo"""" + "\"\"\"bar\"\"\"" + """true3.14false42null${a.b}${?x.y}${"c.d"}""" + "\n"
val expected = List(Tokens.START, Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY, val expected = List(Tokens.COMMA, Tokens.COLON, Tokens.EQUALS, Tokens.CLOSE_CURLY,
Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"), Tokens.OPEN_CURLY, Tokens.CLOSE_SQUARE, Tokens.OPEN_SQUARE, Tokens.PLUS_EQUALS, tokenString("foo"),
tokenString("bar"), tokenTrue, tokenDouble(3.14), tokenFalse, tokenString("bar"), tokenTrue, tokenDouble(3.14), tokenFalse,
tokenLong(42), tokenNull, tokenSubstitution(tokenUnquoted("a.b")), tokenLong(42), tokenNull, tokenSubstitution(tokenUnquoted("a.b")),
tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenOptionalSubstitution(tokenUnquoted("x.y")),
tokenKeySubstitution("c.d"), tokenLine(1), Tokens.END) tokenKeySubstitution("c.d"), tokenLine(1))
assertEquals(expected, tokenizeAsList(source)) tokenizerTest(expected, source)
assertEquals(source, tokenizeAsString(source))
} }
@Test @Test
def tokenizeAllTypesWithSingleSpaces() { def tokenizeAllTypesWithSingleSpaces() {
val source = """ , : = } { ] [ += "foo" """ + "\"\"\"bar\"\"\"" + """ 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n " val source = """ , : = } { ] [ += "foo" """ + "\"\"\"bar\"\"\"" + """ 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n "
val expected = List(Tokens.START, tokenWhitespace(" "), Tokens.COMMA, tokenWhitespace(" "), Tokens.COLON, tokenWhitespace(" "), val expected = List(tokenWhitespace(" "), Tokens.COMMA, tokenWhitespace(" "), Tokens.COLON, tokenWhitespace(" "),
Tokens.EQUALS, tokenWhitespace(" "), Tokens.CLOSE_CURLY, tokenWhitespace(" "), Tokens.OPEN_CURLY, tokenWhitespace(" "), Tokens.EQUALS, tokenWhitespace(" "), Tokens.CLOSE_CURLY, tokenWhitespace(" "), Tokens.OPEN_CURLY, tokenWhitespace(" "),
Tokens.CLOSE_SQUARE, tokenWhitespace(" "), Tokens.OPEN_SQUARE, tokenWhitespace(" "), Tokens.PLUS_EQUALS, tokenWhitespace(" "), Tokens.CLOSE_SQUARE, tokenWhitespace(" "), Tokens.OPEN_SQUARE, tokenWhitespace(" "), Tokens.PLUS_EQUALS, tokenWhitespace(" "),
tokenString("foo"), tokenUnquoted(" "), tokenString("bar"), tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "), tokenString("foo"), tokenUnquoted(" "), tokenString("bar"), tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "),
@ -60,15 +57,14 @@ class TokenizerTest extends TestUtils {
tokenUnquoted(" "), tokenSubstitution(tokenUnquoted("a.b")), tokenUnquoted(" "), tokenUnquoted(" "), tokenSubstitution(tokenUnquoted("a.b")), tokenUnquoted(" "),
tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenUnquoted(" "), tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenUnquoted(" "),
tokenKeySubstitution("c.d"), tokenWhitespace(" "), tokenKeySubstitution("c.d"), tokenWhitespace(" "),
tokenLine(1), tokenWhitespace(" "), Tokens.END) tokenLine(1), tokenWhitespace(" "))
assertEquals(expected, tokenizeAsList(source)) tokenizerTest(expected, source)
assertEquals(source, tokenizeAsString(source))
} }
@Test @Test
def tokenizeAllTypesWithMultipleSpaces() { def tokenizeAllTypesWithMultipleSpaces() {
val source = """ , : = } { ] [ += "foo" """ + "\"\"\"bar\"\"\"" + """ 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n " val source = """ , : = } { ] [ += "foo" """ + "\"\"\"bar\"\"\"" + """ 42 true 3.14 false null ${a.b} ${?x.y} ${"c.d"} """ + "\n "
val expected = List(Tokens.START, tokenWhitespace(" "), Tokens.COMMA, tokenWhitespace(" "), Tokens.COLON, tokenWhitespace(" "), val expected = List(tokenWhitespace(" "), Tokens.COMMA, tokenWhitespace(" "), Tokens.COLON, tokenWhitespace(" "),
Tokens.EQUALS, tokenWhitespace(" "), Tokens.CLOSE_CURLY, tokenWhitespace(" "), Tokens.OPEN_CURLY, tokenWhitespace(" "), Tokens.CLOSE_SQUARE, Tokens.EQUALS, tokenWhitespace(" "), Tokens.CLOSE_CURLY, tokenWhitespace(" "), Tokens.OPEN_CURLY, tokenWhitespace(" "), Tokens.CLOSE_SQUARE,
tokenWhitespace(" "), Tokens.OPEN_SQUARE, tokenWhitespace(" "), Tokens.PLUS_EQUALS, tokenWhitespace(" "), tokenString("foo"), tokenWhitespace(" "), Tokens.OPEN_SQUARE, tokenWhitespace(" "), Tokens.PLUS_EQUALS, tokenWhitespace(" "), tokenString("foo"),
tokenUnquoted(" "), tokenString("bar"), tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "), tokenTrue, tokenUnquoted(" "), tokenUnquoted(" "), tokenString("bar"), tokenUnquoted(" "), tokenLong(42), tokenUnquoted(" "), tokenTrue, tokenUnquoted(" "),
@ -76,57 +72,50 @@ class TokenizerTest extends TestUtils {
tokenUnquoted(" "), tokenSubstitution(tokenUnquoted("a.b")), tokenUnquoted(" "), tokenUnquoted(" "), tokenSubstitution(tokenUnquoted("a.b")), tokenUnquoted(" "),
tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenUnquoted(" "), tokenOptionalSubstitution(tokenUnquoted("x.y")), tokenUnquoted(" "),
tokenKeySubstitution("c.d"), tokenWhitespace(" "), tokenKeySubstitution("c.d"), tokenWhitespace(" "),
tokenLine(1), tokenWhitespace(" "), Tokens.END) tokenLine(1), tokenWhitespace(" "))
assertEquals(expected, tokenizeAsList(source)) tokenizerTest(expected, source)
assertEquals(source, tokenizeAsString(source))
} }
@Test @Test
def tokenizeTrueAndUnquotedText() { def tokenizeTrueAndUnquotedText() {
val source = """truefoo""" val source = """truefoo"""
val expected = List(Tokens.START, tokenTrue, tokenUnquoted("foo"), Tokens.END) val expected = List(tokenTrue, tokenUnquoted("foo"))
assertEquals(expected, tokenizeAsList(source)) tokenizerTest(expected, source)
assertEquals(source, tokenizeAsString(source))
} }
@Test @Test
def tokenizeFalseAndUnquotedText() { def tokenizeFalseAndUnquotedText() {
val source = """falsefoo""" val source = """falsefoo"""
val expected = List(Tokens.START, tokenFalse, tokenUnquoted("foo"), Tokens.END) val expected = List(tokenFalse, tokenUnquoted("foo"))
assertEquals(expected, tokenizeAsList(source)) tokenizerTest(expected, source)
assertEquals(source, tokenizeAsString(source))
} }
@Test @Test
def tokenizeNullAndUnquotedText() { def tokenizeNullAndUnquotedText() {
val source = """nullfoo""" val source = """nullfoo"""
val expected = List(Tokens.START, tokenNull, tokenUnquoted("foo"), Tokens.END) val expected = List(tokenNull, tokenUnquoted("foo"))
assertEquals(expected, tokenizeAsList(source)) tokenizerTest(expected, source)
assertEquals(source, tokenizeAsString(source))
} }
@Test @Test
def tokenizeUnquotedTextContainingTrue() { def tokenizeUnquotedTextContainingTrue() {
val source = """footrue""" val source = """footrue"""
val expected = List(Tokens.START, tokenUnquoted("footrue"), Tokens.END) val expected = List(tokenUnquoted("footrue"))
assertEquals(expected, tokenizeAsList(source)) tokenizerTest(expected, source)
assertEquals(source, tokenizeAsString(source))
} }
@Test @Test
def tokenizeUnquotedTextContainingSpaceTrue() { def tokenizeUnquotedTextContainingSpaceTrue() {
val source = """foo true""" val source = """foo true"""
val expected = List(Tokens.START, tokenUnquoted("foo"), tokenUnquoted(" "), tokenTrue, Tokens.END) val expected = List(tokenUnquoted("foo"), tokenUnquoted(" "), tokenTrue)
assertEquals(expected, tokenizeAsList(source)) tokenizerTest(expected, source)
assertEquals(source, tokenizeAsString(source))
} }
@Test @Test
def tokenizeTrueAndSpaceAndUnquotedText() { def tokenizeTrueAndSpaceAndUnquotedText() {
val source = """true foo""" val source = """true foo"""
val expected = List(Tokens.START, tokenTrue, tokenUnquoted(" "), tokenUnquoted("foo"), Tokens.END) val expected = List(tokenTrue, tokenUnquoted(" "), tokenUnquoted("foo"))
assertEquals(expected, tokenizeAsList(source)) tokenizerTest(expected, source)
assertEquals(source, tokenizeAsString(source))
} }
@Test @Test
@ -134,36 +123,33 @@ class TokenizerTest extends TestUtils {
tokenizerTest(List(tokenUnquoted("a/b/c/")), "a/b/c/") tokenizerTest(List(tokenUnquoted("a/b/c/")), "a/b/c/")
tokenizerTest(List(tokenUnquoted("/")), "/") tokenizerTest(List(tokenUnquoted("/")), "/")
tokenizerTest(List(tokenUnquoted("/"), tokenUnquoted(" "), tokenUnquoted("/")), "/ /") tokenizerTest(List(tokenUnquoted("/"), tokenUnquoted(" "), tokenUnquoted("/")), "/ /")
tokenizerTest(List(tokenComment("", true)), "//") tokenizerTest(List(tokenCommentDoubleSlash("")), "//")
} }
@Test @Test
def tokenizeUnquotedTextKeepsSpaces() { def tokenizeUnquotedTextKeepsSpaces() {
val source = " foo \n" val source = " foo \n"
val expected = List(Tokens.START, tokenWhitespace(" "), tokenUnquoted("foo"), tokenWhitespace(" "), val expected = List(tokenWhitespace(" "), tokenUnquoted("foo"), tokenWhitespace(" "),
tokenLine(1), Tokens.END) tokenLine(1))
assertEquals(expected, tokenizeAsList(source)) tokenizerTest(expected, source)
assertEquals(source, tokenizeAsString(source))
} }
@Test @Test
def tokenizeUnquotedTextKeepsInternalSpaces() { def tokenizeUnquotedTextKeepsInternalSpaces() {
val source = " foo bar baz \n" val source = " foo bar baz \n"
val expected = List(Tokens.START, tokenWhitespace(" "), tokenUnquoted("foo"), tokenUnquoted(" "), val expected = List(tokenWhitespace(" "), tokenUnquoted("foo"), tokenUnquoted(" "),
tokenUnquoted("bar"), tokenUnquoted(" "), tokenUnquoted("baz"), tokenWhitespace(" "), tokenUnquoted("bar"), tokenUnquoted(" "), tokenUnquoted("baz"), tokenWhitespace(" "),
tokenLine(1), Tokens.END) tokenLine(1))
assertEquals(expected, tokenizeAsList(" foo bar baz \n")) tokenizerTest(expected, source)
assertEquals(source, tokenizeAsString(source))
} }
@Test @Test
def tokenizeMixedUnquotedQuoted() { def tokenizeMixedUnquotedQuoted() {
val source = " foo\"bar\"baz \n" val source = " foo\"bar\"baz \n"
val expected = List(Tokens.START, tokenWhitespace(" "), tokenUnquoted("foo"), val expected = List(tokenWhitespace(" "), tokenUnquoted("foo"),
tokenString("bar"), tokenUnquoted("baz"), tokenWhitespace(" "), tokenString("bar"), tokenUnquoted("baz"), tokenWhitespace(" "),
tokenLine(1), Tokens.END) tokenLine(1))
assertEquals(expected, tokenizeAsList(" foo\"bar\"baz \n")) tokenizerTest(expected, source)
assertEquals(source, tokenizeAsString(source))
} }
@Test @Test
@ -185,10 +171,9 @@ class TokenizerTest extends TestUtils {
for (t <- tests) { for (t <- tests) {
describeFailure(t.toString) { describeFailure(t.toString) {
assertEquals(List(Tokens.START, tokenWhitespace(" "), Tokens.newValue(t.result, t.toString), val expected = List(tokenWhitespace(" "), Tokens.newValue(t.result, t.toString),
tokenWhitespace(" "), Tokens.END), tokenWhitespace(" "))
tokenizeAsList(t.escaped)) tokenizerTest(expected, t.escaped)
assertEquals(t.escaped, tokenizeAsString(t.escaped))
} }
} }
} }
@ -218,41 +203,36 @@ class TokenizerTest extends TestUtils {
@Test @Test
def tokenizerEmptyTripleQuoted(): Unit = { def tokenizerEmptyTripleQuoted(): Unit = {
val source = "\"\"\"\"\"\"" val source = "\"\"\"\"\"\""
assertEquals(List(Tokens.START, tokenString(""), Tokens.END), val expected = List(tokenString(""))
tokenizeAsList(source)) tokenizerTest(expected, source)
assertEquals(source, tokenizeAsString(source))
} }
@Test @Test
def tokenizerTrivialTripleQuoted(): Unit = { def tokenizerTrivialTripleQuoted(): Unit = {
val source = "\"\"\"bar\"\"\"" val source = "\"\"\"bar\"\"\""
assertEquals(List(Tokens.START, tokenString("bar"), Tokens.END), val expected = List(tokenString("bar"))
tokenizeAsList(source)) tokenizerTest(expected, source)
assertEquals(source, tokenizeAsString(source))
} }
@Test @Test
def tokenizerNoEscapesInTripleQuoted(): Unit = { def tokenizerNoEscapesInTripleQuoted(): Unit = {
val source = "\"\"\"\\n\"\"\"" val source = "\"\"\"\\n\"\"\""
assertEquals(List(Tokens.START, tokenString("\\n"), Tokens.END), val expected = List(tokenString("\\n"))
tokenizeAsList(source)) tokenizerTest(expected, source)
assertEquals(source, tokenizeAsString(source))
} }
@Test @Test
def tokenizerTrailingQuotesInTripleQuoted(): Unit = { def tokenizerTrailingQuotesInTripleQuoted(): Unit = {
val source = "\"\"\"\"\"\"\"\"\"" val source = "\"\"\"\"\"\"\"\"\""
assertEquals(List(Tokens.START, tokenString("\"\"\""), Tokens.END), val expected = List(tokenString("\"\"\""))
tokenizeAsList(source)) tokenizerTest(expected, source)
assertEquals(source, tokenizeAsString(source))
} }
@Test @Test
def tokenizerNewlineInTripleQuoted(): Unit = { def tokenizerNewlineInTripleQuoted(): Unit = {
val source = "\"\"\"foo\nbar\"\"\"" val source = "\"\"\"foo\nbar\"\"\""
assertEquals(List(Tokens.START, tokenString("foo\nbar"), Tokens.END), val expected = List(tokenString("foo\nbar"))
tokenizeAsList(source)) tokenizerTest(expected, source)
assertEquals(source, tokenizeAsString(source))
} }
@Test @Test
@ -274,9 +254,8 @@ class TokenizerTest extends TestUtils {
for (t <- tests) { for (t <- tests) {
describeFailure(t.toString()) { describeFailure(t.toString()) {
assertEquals(List(Tokens.START, t.result, Tokens.END), val expected = List(t.result)
tokenizeAsList(t.s)) tokenizerTest(expected, t.s)
assertEquals(t.s, tokenizeAsString(t.s))
} }
} }
} }
@ -285,20 +264,30 @@ class TokenizerTest extends TestUtils {
def commentsHandledInVariousContexts() { def commentsHandledInVariousContexts() {
tokenizerTest(List(tokenString("//bar")), "\"//bar\"") tokenizerTest(List(tokenString("//bar")), "\"//bar\"")
tokenizerTest(List(tokenString("#bar")), "\"#bar\"") tokenizerTest(List(tokenString("#bar")), "\"#bar\"")
tokenizerTest(List(tokenUnquoted("bar"), tokenComment("comment", true)), "bar//comment") tokenizerTest(List(tokenUnquoted("bar"), tokenCommentDoubleSlash("comment")), "bar//comment")
tokenizerTest(List(tokenUnquoted("bar"), tokenComment("comment", false)), "bar#comment") tokenizerTest(List(tokenUnquoted("bar"), tokenCommentHash("comment")), "bar#comment")
tokenizerTest(List(tokenInt(10), tokenComment("comment", true)), "10//comment") tokenizerTest(List(tokenInt(10), tokenCommentDoubleSlash("comment")), "10//comment")
tokenizerTest(List(tokenInt(10), tokenComment("comment", false)), "10#comment") tokenizerTest(List(tokenInt(10), tokenCommentHash("comment")), "10#comment")
tokenizerTest(List(tokenDouble(3.14), tokenComment("comment", true)), "3.14//comment") tokenizerTest(List(tokenDouble(3.14), tokenCommentDoubleSlash("comment")), "3.14//comment")
tokenizerTest(List(tokenDouble(3.14), tokenComment("comment", false)), "3.14#comment") tokenizerTest(List(tokenDouble(3.14), tokenCommentHash("comment")), "3.14#comment")
// be sure we keep the newline // be sure we keep the newline
tokenizerTest(List(tokenInt(10), tokenComment("comment", true), tokenLine(1), tokenInt(12)), "10//comment\n12") tokenizerTest(List(tokenInt(10), tokenCommentDoubleSlash("comment"), tokenLine(1), tokenInt(12)), "10//comment\n12")
tokenizerTest(List(tokenInt(10), tokenComment("comment", false), tokenLine(1), tokenInt(12)), "10#comment\n12") tokenizerTest(List(tokenInt(10), tokenCommentHash("comment"), tokenLine(1), tokenInt(12)), "10#comment\n12")
// be sure we handle multi-line comments // be sure we handle multi-line comments
tokenizerTest(List(tokenComment("comment", true), tokenLine(1), tokenComment("comment2", true)), tokenizerTest(List(tokenCommentDoubleSlash("comment"), tokenLine(1), tokenCommentDoubleSlash("comment2")),
"//comment\n//comment2") "//comment\n//comment2")
tokenizerTest(List(tokenComment("comment", false), tokenLine(1), tokenComment("comment2", false)), tokenizerTest(List(tokenCommentHash("comment"), tokenLine(1), tokenCommentHash("comment2")),
"#comment\n#comment2") "#comment\n#comment2")
tokenizerTest(List(tokenWhitespace(" "), tokenCommentDoubleSlash("comment\r"),
tokenLine(1), tokenWhitespace(" "), tokenCommentDoubleSlash("comment2 "),
tokenLine(2), tokenCommentDoubleSlash("comment3 "),
tokenLine(3), tokenLine(4), tokenCommentDoubleSlash("comment4")),
" //comment\r\n //comment2 \n//comment3 \n\n//comment4")
tokenizerTest(List(tokenWhitespace(" "), tokenCommentDoubleSlash("comment\r"),
tokenLine(1), tokenWhitespace(" "), tokenCommentDoubleSlash("comment2 "),
tokenLine(2), tokenCommentDoubleSlash("comment3 "),
tokenLine(3), tokenLine(4), tokenCommentDoubleSlash("comment4")),
" //comment\r\n //comment2 \n//comment3 \n\n//comment4")
} }
@Test @Test