From e53a7d6294e863e4928abf13975c94b94623ed15 Mon Sep 17 00:00:00 2001 From: Havoc Pennington Date: Mon, 7 Nov 2011 12:24:05 -0500 Subject: [PATCH] more typing and hacking; add json parser --- .../config/impl/AbstractConfigObject.java | 3 + .../config/impl/ConfigSubstitution.java | 4 +- src/com/typesafe/config/impl/Parser.java | 187 +++++++++++++++++- src/com/typesafe/config/impl/Token.java | 18 ++ src/com/typesafe/config/impl/TokenType.java | 2 +- src/com/typesafe/config/impl/Tokenizer.java | 57 +++--- src/com/typesafe/config/impl/Tokens.java | 76 +++++-- 7 files changed, 296 insertions(+), 51 deletions(-) create mode 100644 src/com/typesafe/config/impl/Token.java diff --git a/src/com/typesafe/config/impl/AbstractConfigObject.java b/src/com/typesafe/config/impl/AbstractConfigObject.java index 623bb799..caae93cb 100644 --- a/src/com/typesafe/config/impl/AbstractConfigObject.java +++ b/src/com/typesafe/config/impl/AbstractConfigObject.java @@ -229,6 +229,9 @@ abstract class AbstractConfigObject extends AbstractConfigValue implements List l = new ArrayList(); List list = getList(path); for (ConfigValue v : list) { + if (expected != null && transformer != null) { + v = transformer.transform(v, expected); + } if (v.valueType() != expected) throw new ConfigException.WrongType(v.origin(), path, expected.name(), v.valueType().name()); diff --git a/src/com/typesafe/config/impl/ConfigSubstitution.java b/src/com/typesafe/config/impl/ConfigSubstitution.java index 05d8743e..621b3aee 100644 --- a/src/com/typesafe/config/impl/ConfigSubstitution.java +++ b/src/com/typesafe/config/impl/ConfigSubstitution.java @@ -8,10 +8,10 @@ import com.typesafe.config.ConfigValueType; final class ConfigSubstitution extends AbstractConfigValue { private AbstractConfigObject root; - private List tokens; + private List tokens; ConfigSubstitution(ConfigOrigin origin, AbstractConfigObject root, - List tokens) { + List tokens) { super(origin); this.root = root; this.tokens = tokens; diff --git a/src/com/typesafe/config/impl/Parser.java b/src/com/typesafe/config/impl/Parser.java index 2a30127c..67bf6119 100644 --- a/src/com/typesafe/config/impl/Parser.java +++ b/src/com/typesafe/config/impl/Parser.java @@ -4,10 +4,17 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; import java.util.Iterator; +import java.util.List; +import java.util.Map; import com.typesafe.config.ConfigException; import com.typesafe.config.ConfigOrigin; +import com.typesafe.config.ConfigValue; +import com.typesafe.config.ConfigValueType; final class Parser { /** @@ -15,7 +22,7 @@ final class Parser { * buffered. Does not close the stream; you have to arrange to do that * yourself. */ - static AbstractConfigObject parse(ConfigOrigin origin, InputStream input) { + static AbstractConfigValue parse(ConfigOrigin origin, InputStream input) { try { return parse(origin, new InputStreamReader(input, "UTF-8")); } catch (UnsupportedEncodingException e) { @@ -24,14 +31,180 @@ final class Parser { } } - static AbstractConfigObject parse(ConfigOrigin origin, - Reader input) { - Iterator tokens = Tokenizer.tokenize(origin, input); + static AbstractConfigValue parse(ConfigOrigin origin, Reader input) { + Iterator tokens = Tokenizer.tokenize(origin, input); return parse(origin, tokens); } - private static AbstractConfigObject parse(ConfigOrigin origin, - Iterator tokens) { - return null; // FIXME + static private final class ParseContext { + private int lineNumber; + private ConfigOrigin baseOrigin; + + ParseContext(ConfigOrigin origin) { + lineNumber = 0; + baseOrigin = origin; + } + + private Token nextTokenIgnoringNewline(Iterator tokens) { + Token t = tokens.next(); + while (Tokens.isNewline(t)) { + lineNumber = Tokens.getLineNumber(t); + t = tokens.next(); + } + return t; + } + + private ConfigOrigin lineOrigin() { + return new SimpleConfigOrigin(baseOrigin.description() + ": line " + + lineNumber); + } + + private ConfigException parseError(String message) { + return parseError(message, null); + } + + private ConfigException parseError(String message, Throwable cause) { + return new ConfigException.Parse(lineOrigin(), message, cause); + } + + private AbstractConfigValue parseValue(Token token, + Iterator tokens) { + if (Tokens.isValue(token)) { + return Tokens.getValue(token); + } else if (token == Tokens.OPEN_CURLY) { + return parseObject(tokens); + } else if (token == Tokens.OPEN_SQUARE) { + return parseArray(tokens); + } else { + throw parseError("Expecting a value but got wrong token: " + + token); + } + } + + private AbstractConfigObject parseObject(Iterator tokens) { + // invoked just after the OPEN_CURLY + Map values = new HashMap(); + ConfigOrigin objectOrigin = lineOrigin(); + while (true) { + Token t = nextTokenIgnoringNewline(tokens); + if (Tokens.isValueWithType(t, ConfigValueType.STRING)) { + String key = (String) Tokens.getValue(t).unwrapped(); + Token afterKey = nextTokenIgnoringNewline(tokens); + if (afterKey != Tokens.COLON) { + throw parseError("Key not followed by a colon, followed by token " + + afterKey); + } + Token valueToken = nextTokenIgnoringNewline(tokens); + + // note how we handle duplicate keys: the last one just + // wins. + // FIXME in strict JSON, dups should be an error; while in + // our custom config language, they should be merged if the + // value is an object. + values.put(key, parseValue(valueToken, tokens)); + } else if (t == Tokens.CLOSE_CURLY) { + break; + } else { + throw parseError("Expecting close brace } or a field name, got " + + t); + } + t = nextTokenIgnoringNewline(tokens); + if (t == Tokens.CLOSE_CURLY) { + break; + } else if (t == Tokens.COMMA) { + // continue looping + } else { + throw parseError("Expecting close brace } or a comma, got " + + t); + } + } + return new SimpleConfigObject(objectOrigin, null, values); + } + + private ConfigList parseArray(Iterator tokens) { + // invoked just after the OPEN_SQUARE + ConfigOrigin arrayOrigin = lineOrigin(); + List values = new ArrayList(); + Token t = nextTokenIgnoringNewline(tokens); + + // special-case the first element + if (t == Tokens.CLOSE_SQUARE) { + return new ConfigList(arrayOrigin, + Collections. emptyList()); + } else if (Tokens.isValue(t)) { + values.add(parseValue(t, tokens)); + } else if (t == Tokens.OPEN_CURLY) { + values.add(parseObject(tokens)); + } else if (t == Tokens.OPEN_SQUARE) { + values.add(parseArray(tokens)); + } else { + throw parseError("List should have ] or a first element after the open [, instead had token: " + + t); + } + + // now remaining elements + while (true) { + // just after a value + t = nextTokenIgnoringNewline(tokens); + if (t == Tokens.CLOSE_SQUARE) { + return new ConfigList(arrayOrigin, values); + } else if (t == Tokens.COMMA) { + // OK + } else { + throw parseError("List should have ended with ] or had a comma, instead had token: " + + t); + } + + // now just after a comma + t = nextTokenIgnoringNewline(tokens); + if (Tokens.isValue(t)) { + values.add(parseValue(t, tokens)); + } else if (t == Tokens.OPEN_CURLY) { + values.add(parseObject(tokens)); + } else if (t == Tokens.OPEN_SQUARE) { + values.add(parseArray(tokens)); + } else { + throw parseError("List should have had new element after a comma, instead had token: " + + t); + } + } + } + + AbstractConfigValue parse(Iterator tokens) { + Token t = nextTokenIgnoringNewline(tokens); + if (t == Tokens.START) { + // OK + } else { + throw new ConfigException.BugOrBroken( + "token stream did not begin with START, had " + t); + } + + t = nextTokenIgnoringNewline(tokens); + AbstractConfigValue result = null; + if (t == Tokens.OPEN_CURLY) { + result = parseObject(tokens); + } else if (t == Tokens.OPEN_SQUARE) { + result = parseArray(tokens); + } else if (t == Tokens.END) { + throw parseError("Empty document"); + } else { + throw parseError("Document must have an object or array at root, unexpected token: " + + t); + } + + t = nextTokenIgnoringNewline(tokens); + if (t == Tokens.END) { + return result; + } else { + throw parseError("Document has trailing tokens after first object or array: " + + t); + } + } + } + + private static AbstractConfigValue parse(ConfigOrigin origin, + Iterator tokens) { + ParseContext context = new ParseContext(origin); + return context.parse(tokens); } } diff --git a/src/com/typesafe/config/impl/Token.java b/src/com/typesafe/config/impl/Token.java new file mode 100644 index 00000000..9992ee12 --- /dev/null +++ b/src/com/typesafe/config/impl/Token.java @@ -0,0 +1,18 @@ +package com.typesafe.config.impl; + +class Token { + private TokenType tokenType; + + Token(TokenType tokenType) { + this.tokenType = tokenType; + } + + public TokenType tokenType() { + return tokenType; + } + + @Override + public String toString() { + return tokenType.name(); + } +} diff --git a/src/com/typesafe/config/impl/TokenType.java b/src/com/typesafe/config/impl/TokenType.java index 3b946aad..af545b1c 100644 --- a/src/com/typesafe/config/impl/TokenType.java +++ b/src/com/typesafe/config/impl/TokenType.java @@ -1,5 +1,5 @@ package com.typesafe.config.impl; enum TokenType { - START, END, COMMA, COLON, OPEN_CURLY, CLOSE_CURLY, OPEN_SQUARE, CLOSE_SQUARE, VALUE; + START, END, COMMA, COLON, OPEN_CURLY, CLOSE_CURLY, OPEN_SQUARE, CLOSE_SQUARE, VALUE, NEWLINE; } diff --git a/src/com/typesafe/config/impl/Tokenizer.java b/src/com/typesafe/config/impl/Tokenizer.java index 8096960a..1506f262 100644 --- a/src/com/typesafe/config/impl/Tokenizer.java +++ b/src/com/typesafe/config/impl/Tokenizer.java @@ -8,24 +8,23 @@ import java.util.Queue; import com.typesafe.config.ConfigException; import com.typesafe.config.ConfigOrigin; -import com.typesafe.config.impl.Tokens.Token; final class Tokenizer { /** * Tokenizes a Reader. Does not close the reader; you have to arrange to do * that after you're done with the returned iterator. */ - static Iterator tokenize(ConfigOrigin origin, Reader input) { + static Iterator tokenize(ConfigOrigin origin, Reader input) { return new TokenIterator(origin, input); } - private static class TokenIterator implements Iterator { + private static class TokenIterator implements Iterator { private ConfigOrigin origin; private Reader input; private int oneCharBuffer; private int lineNumber; - private Queue tokens; + private Queue tokens; private int nextChar() { if (oneCharBuffer >= 0) { @@ -56,9 +55,9 @@ final class Tokenizer { if (c == -1) { return -1; + } else if (c == '\n') { + return c; } else if (Character.isWhitespace(c)) { - if (c == '\n') - lineNumber += 1; continue; } else { return c; @@ -66,13 +65,12 @@ final class Tokenizer { } } - private void parseError(String message) { - parseError(message, null); + private ConfigException parseError(String message) { + return parseError(message, null); } - private void parseError(String message, Throwable cause) { - throw new ConfigException.Parse(origin, - lineNumber + ": " + message, cause); + private ConfigException parseError(String message, Throwable cause) { + return new ConfigException.Parse(lineOrigin(), message, cause); } private void checkNextOrThrow(String expectedBefore, String expectedNow) { @@ -82,12 +80,12 @@ final class Tokenizer { int actual = nextChar(); if (actual == -1) - parseError(String.format( + throw parseError(String.format( "Expecting '%s%s' but input data ended", expectedBefore, expectedNow)); if (actual != expected) - parseError(String + throw parseError(String .format("Expecting '%s%s' but got char '%c' rather than '%c'", expectedBefore, expectedNow, actual, expected)); @@ -101,25 +99,25 @@ final class Tokenizer { + lineNumber); } - private Tokens.Token pullTrue() { + private Token pullTrue() { // "t" has been already seen checkNextOrThrow("t", "rue"); return Tokens.newBoolean(lineOrigin(), true); } - private Tokens.Token pullFalse() { + private Token pullFalse() { // "f" has been already seen checkNextOrThrow("f", "alse"); return Tokens.newBoolean(lineOrigin(), false); } - private Tokens.Token pullNull() { + private Token pullNull() { // "n" has been already seen checkNextOrThrow("n", "ull"); return Tokens.newNull(lineOrigin()); } - private Tokens.Token pullNumber(int firstChar) { + private Token pullNumber(int firstChar) { StringBuilder sb = new StringBuilder(); sb.append((char) firstChar); boolean containedDecimalOrE = false; @@ -144,15 +142,14 @@ final class Tokenizer { return Tokens.newLong(lineOrigin(), Long.parseLong(s)); } } catch (NumberFormatException e) { - parseError("Invalid number", e); - throw new ConfigException.BugOrBroken("not reached"); + throw parseError("Invalid number", e); } } private void pullEscapeSequence(StringBuilder sb) { int escaped = nextChar(); if (escaped == -1) - parseError("End of input but backslash in string had nothing after it"); + throw parseError("End of input but backslash in string had nothing after it"); switch (escaped) { case '"': @@ -185,14 +182,14 @@ final class Tokenizer { for (int i = 0; i < 4; ++i) { int c = nextChar(); if (c == -1) - parseError("End of input but expecting 4 hex digits for \\uXXXX escape"); + throw parseError("End of input but expecting 4 hex digits for \\uXXXX escape"); a[i] = (char) c; } String digits = new String(a); try { sb.appendCodePoint(Integer.parseInt(digits, 16)); } catch (NumberFormatException e) { - parseError( + throw parseError( String.format( "Malformed hex digits after \\u escape in string: '%s'", digits), e); @@ -200,20 +197,20 @@ final class Tokenizer { } break; default: - parseError(String + throw parseError(String .format("backslash followed by '%c', this is not a valid escape sequence", escaped)); } } - private Tokens.Token pullQuotedString() { + private Token pullQuotedString() { // the open quote has already been consumed StringBuilder sb = new StringBuilder(); int c = '\0'; // value doesn't get used do { c = nextChar(); if (c == -1) - parseError("End of input but string quote was still open"); + throw parseError("End of input but string quote was still open"); if (c == '\\') { pullEscapeSequence(sb); @@ -230,6 +227,10 @@ final class Tokenizer { int c = nextCharAfterWhitespace(); if (c == -1) { tokens.add(Tokens.END); + } else if (c == '\n') { + // newline tokens have the just-ended line number + tokens.add(Tokens.newLine(lineNumber)); + lineNumber += 1; } else { Token t = null; switch (c) { @@ -268,7 +269,7 @@ final class Tokenizer { if ("-0123456789".indexOf(c) >= 0) { t = pullNumber(c); } else { - parseError(String + throw parseError(String .format("Character '%c' is not the start of any valid token", c)); } @@ -285,7 +286,7 @@ final class Tokenizer { this.input = input; oneCharBuffer = -1; lineNumber = 0; - tokens = new LinkedList(); + tokens = new LinkedList(); tokens.add(Tokens.START); } @@ -296,7 +297,7 @@ final class Tokenizer { @Override public Token next() { - Tokens.Token t = tokens.remove(); + Token t = tokens.remove(); if (t != Tokens.END) { queueNextToken(); if (tokens.isEmpty()) diff --git a/src/com/typesafe/config/impl/Tokens.java b/src/com/typesafe/config/impl/Tokens.java index 65aef46a..12b482e6 100644 --- a/src/com/typesafe/config/impl/Tokens.java +++ b/src/com/typesafe/config/impl/Tokens.java @@ -1,21 +1,12 @@ package com.typesafe.config.impl; +import com.typesafe.config.ConfigException; +import com.typesafe.config.ConfigObject; import com.typesafe.config.ConfigOrigin; +import com.typesafe.config.ConfigValueType; final class Tokens { - static class Token { - private TokenType tokenType; - - Token(TokenType tokenType) { - this.tokenType = tokenType; - } - - public TokenType tokenType() { - return tokenType; - } - } - - static class Value extends Token { + static private class Value extends Token { private AbstractConfigValue value; @@ -27,12 +18,67 @@ final class Tokens { AbstractConfigValue value() { return value; } + + @Override + public String toString() { + String s = tokenType().name() + "(" + value.valueType().name() + + ")"; + if (value instanceof ConfigObject || value instanceof ConfigList) { + return s; + } else { + return s + "='" + value().unwrapped() + "'"; + } + } + } + + static private class Line extends Token { + private int lineNumber; + + Line(int lineNumber) { + super(TokenType.NEWLINE); + this.lineNumber = lineNumber; + } + + int lineNumber() { + return lineNumber; + } + + @Override + public String toString() { + return "NEWLINE@" + lineNumber; + } } static boolean isValue(Token token) { return token instanceof Value; } + static AbstractConfigValue getValue(Token token) { + if (token instanceof Value) { + return ((Value) token).value(); + } else { + throw new ConfigException.BugOrBroken( + "tried to get value of non-value token"); + } + } + + static boolean isValueWithType(Token t, ConfigValueType valueType) { + return isValue(t) && getValue(t).valueType() == valueType; + } + + static boolean isNewline(Token token) { + return token instanceof Line; + } + + static int getLineNumber(Token token) { + if (token instanceof Line) { + return ((Line) token).lineNumber(); + } else { + throw new ConfigException.BugOrBroken( + "tried to get line number from non-newline"); + } + } + static Token START = new Token(TokenType.START); static Token END = new Token(TokenType.END); static Token COMMA = new Token(TokenType.COMMA); @@ -42,6 +88,10 @@ final class Tokens { static Token OPEN_SQUARE = new Token(TokenType.OPEN_SQUARE); static Token CLOSE_SQUARE = new Token(TokenType.CLOSE_SQUARE); + static Token newLine(int lineNumberJustEnded) { + return new Line(lineNumberJustEnded); + } + static Token newValue(AbstractConfigValue value) { return new Value(value); }