From f5edf529a5bd52ada69a9a3136912740d9b0f1b3 Mon Sep 17 00:00:00 2001 From: Havoc Pennington Date: Sun, 6 Nov 2011 23:14:42 -0500 Subject: [PATCH] more hacking, mostly on json tokenizer --- src/com/typesafe/config/ConfigException.java | 42 +++ .../config/impl/ConfigSubstitution.java | 31 ++ .../config/impl/DefaultTransformer.java | 5 +- src/com/typesafe/config/impl/Parser.java | 37 ++ .../typesafe/config/impl/RawValueType.java | 27 ++ src/com/typesafe/config/impl/TokenType.java | 5 + src/com/typesafe/config/impl/Tokenizer.java | 315 ++++++++++++++++++ src/com/typesafe/config/impl/Tokens.java | 72 ++++ 8 files changed, 533 insertions(+), 1 deletion(-) create mode 100644 src/com/typesafe/config/impl/ConfigSubstitution.java create mode 100644 src/com/typesafe/config/impl/Parser.java create mode 100644 src/com/typesafe/config/impl/RawValueType.java create mode 100644 src/com/typesafe/config/impl/TokenType.java create mode 100644 src/com/typesafe/config/impl/Tokenizer.java create mode 100644 src/com/typesafe/config/impl/Tokens.java diff --git a/src/com/typesafe/config/ConfigException.java b/src/com/typesafe/config/ConfigException.java index d9cf3823..24dcd58b 100644 --- a/src/com/typesafe/config/ConfigException.java +++ b/src/com/typesafe/config/ConfigException.java @@ -122,4 +122,46 @@ public class ConfigException extends RuntimeException { this(path, message, null); } } + + /** + * Exception indicating that there's a bug in something or the runtime + * environment is broken. This exception should never be handled; instead, + * something should be fixed to keep the exception from occurring. + * + */ + public static class BugOrBroken extends ConfigException { + private static final long serialVersionUID = 1L; + + public BugOrBroken(String message, Throwable cause) { + super(message, cause); + } + + public BugOrBroken(String message) { + this(message, null); + } + } + + public static class IO extends ConfigException { + private static final long serialVersionUID = 1L; + + public IO(ConfigOrigin origin, String message, Throwable cause) { + super(origin, message, cause); + } + + public IO(ConfigOrigin origin, String message) { + this(origin, message, null); + } + } + + public static class Parse extends ConfigException { + private static final long serialVersionUID = 1L; + + public Parse(ConfigOrigin origin, String message, Throwable cause) { + super(origin, message, cause); + } + + public Parse(ConfigOrigin origin, String message) { + this(origin, message, null); + } + } } diff --git a/src/com/typesafe/config/impl/ConfigSubstitution.java b/src/com/typesafe/config/impl/ConfigSubstitution.java new file mode 100644 index 00000000..05d8743e --- /dev/null +++ b/src/com/typesafe/config/impl/ConfigSubstitution.java @@ -0,0 +1,31 @@ +package com.typesafe.config.impl; + +import java.util.List; + +import com.typesafe.config.ConfigOrigin; +import com.typesafe.config.ConfigValueType; + +final class ConfigSubstitution extends AbstractConfigValue { + + private AbstractConfigObject root; + private List tokens; + + ConfigSubstitution(ConfigOrigin origin, AbstractConfigObject root, + List tokens) { + super(origin); + this.root = root; + this.tokens = tokens; + } + + @Override + public ConfigValueType valueType() { + return null; // FIXME + } + + @Override + public Object unwrapped() { + // TODO Auto-generated method stub + return null; + } + +} diff --git a/src/com/typesafe/config/impl/DefaultTransformer.java b/src/com/typesafe/config/impl/DefaultTransformer.java index 98a34513..cf2070a9 100644 --- a/src/com/typesafe/config/impl/DefaultTransformer.java +++ b/src/com/typesafe/config/impl/DefaultTransformer.java @@ -24,19 +24,22 @@ class DefaultTransformer implements ConfigTransformer { } catch (NumberFormatException e) { // oh well. } + break; case NULL: if (s.equals("null")) return new ConfigNull(value.origin()); + break; case BOOLEAN: if (s.equals("true") || s.equals("yes")) { return new ConfigBoolean(value.origin(), true); } else if (s.equals("false") || s.equals("no")) { return new ConfigBoolean(value.origin(), false); } + break; } } else if (requested == ConfigValueType.STRING) { switch (value.valueType()) { - case NUMBER: + case NUMBER: // FALL THROUGH case BOOLEAN: return new ConfigString(value.origin(), value.unwrapped() .toString()); diff --git a/src/com/typesafe/config/impl/Parser.java b/src/com/typesafe/config/impl/Parser.java new file mode 100644 index 00000000..2a30127c --- /dev/null +++ b/src/com/typesafe/config/impl/Parser.java @@ -0,0 +1,37 @@ +package com.typesafe.config.impl; + +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.io.UnsupportedEncodingException; +import java.util.Iterator; + +import com.typesafe.config.ConfigException; +import com.typesafe.config.ConfigOrigin; + +final class Parser { + /** + * Parses an input stream, which must be in UTF-8 encoding and should be + * buffered. Does not close the stream; you have to arrange to do that + * yourself. + */ + static AbstractConfigObject parse(ConfigOrigin origin, InputStream input) { + try { + return parse(origin, new InputStreamReader(input, "UTF-8")); + } catch (UnsupportedEncodingException e) { + throw new ConfigException.BugOrBroken( + "Java runtime does not support UTF-8"); + } + } + + static AbstractConfigObject parse(ConfigOrigin origin, + Reader input) { + Iterator tokens = Tokenizer.tokenize(origin, input); + return parse(origin, tokens); + } + + private static AbstractConfigObject parse(ConfigOrigin origin, + Iterator tokens) { + return null; // FIXME + } +} diff --git a/src/com/typesafe/config/impl/RawValueType.java b/src/com/typesafe/config/impl/RawValueType.java new file mode 100644 index 00000000..ae5ac2e5 --- /dev/null +++ b/src/com/typesafe/config/impl/RawValueType.java @@ -0,0 +1,27 @@ +package com.typesafe.config.impl; + +import com.typesafe.config.ConfigValueType; + +enum RawValueType { + OBJECT(ConfigValueType.OBJECT), + + LIST(ConfigValueType.LIST), + + NUMBER(ConfigValueType.NUMBER), + + BOOLEAN(ConfigValueType.BOOLEAN), + + NULL(ConfigValueType.NULL), + + STRING(ConfigValueType.STRING), + + SUBSTITUTION(null), + + INCLUDE(null); + + ConfigValueType cooked; + + RawValueType(ConfigValueType cooked) { + this.cooked = cooked; + } +} diff --git a/src/com/typesafe/config/impl/TokenType.java b/src/com/typesafe/config/impl/TokenType.java new file mode 100644 index 00000000..3b946aad --- /dev/null +++ b/src/com/typesafe/config/impl/TokenType.java @@ -0,0 +1,5 @@ +package com.typesafe.config.impl; + +enum TokenType { + START, END, COMMA, COLON, OPEN_CURLY, CLOSE_CURLY, OPEN_SQUARE, CLOSE_SQUARE, VALUE; +} diff --git a/src/com/typesafe/config/impl/Tokenizer.java b/src/com/typesafe/config/impl/Tokenizer.java new file mode 100644 index 00000000..8096960a --- /dev/null +++ b/src/com/typesafe/config/impl/Tokenizer.java @@ -0,0 +1,315 @@ +package com.typesafe.config.impl; + +import java.io.IOException; +import java.io.Reader; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.Queue; + +import com.typesafe.config.ConfigException; +import com.typesafe.config.ConfigOrigin; +import com.typesafe.config.impl.Tokens.Token; + +final class Tokenizer { + /** + * Tokenizes a Reader. Does not close the reader; you have to arrange to do + * that after you're done with the returned iterator. + */ + static Iterator tokenize(ConfigOrigin origin, Reader input) { + return new TokenIterator(origin, input); + } + + private static class TokenIterator implements Iterator { + + private ConfigOrigin origin; + private Reader input; + private int oneCharBuffer; + private int lineNumber; + private Queue tokens; + + private int nextChar() { + if (oneCharBuffer >= 0) { + int c = oneCharBuffer; + oneCharBuffer = -1; + return c; + } else { + try { + return input.read(); + } catch (IOException e) { + throw new ConfigException.IO(origin, "read error: " + + e.getMessage(), e); + } + } + } + + private void putBack(int c) { + if (oneCharBuffer >= 0) { + throw new ConfigException.BugOrBroken( + "bug: attempt to putBack() twice in a row"); + } + oneCharBuffer = c; + } + + private int nextCharAfterWhitespace() { + for (;;) { + int c = nextChar(); + + if (c == -1) { + return -1; + } else if (Character.isWhitespace(c)) { + if (c == '\n') + lineNumber += 1; + continue; + } else { + return c; + } + } + } + + private void parseError(String message) { + parseError(message, null); + } + + private void parseError(String message, Throwable cause) { + throw new ConfigException.Parse(origin, + lineNumber + ": " + message, cause); + } + + private void checkNextOrThrow(String expectedBefore, String expectedNow) { + int i = 0; + while (i < expectedNow.length()) { + int expected = expectedNow.charAt(i); + int actual = nextChar(); + + if (actual == -1) + parseError(String.format( + "Expecting '%s%s' but input data ended", + expectedBefore, expectedNow)); + + if (actual != expected) + parseError(String + .format("Expecting '%s%s' but got char '%c' rather than '%c'", + expectedBefore, expectedNow, actual, + expected)); + + ++i; + } + } + + private ConfigOrigin lineOrigin() { + return new SimpleConfigOrigin(origin.description() + ": line " + + lineNumber); + } + + private Tokens.Token pullTrue() { + // "t" has been already seen + checkNextOrThrow("t", "rue"); + return Tokens.newBoolean(lineOrigin(), true); + } + + private Tokens.Token pullFalse() { + // "f" has been already seen + checkNextOrThrow("f", "alse"); + return Tokens.newBoolean(lineOrigin(), false); + } + + private Tokens.Token pullNull() { + // "n" has been already seen + checkNextOrThrow("n", "ull"); + return Tokens.newNull(lineOrigin()); + } + + private Tokens.Token pullNumber(int firstChar) { + StringBuilder sb = new StringBuilder(); + sb.append((char) firstChar); + boolean containedDecimalOrE = false; + int c = nextChar(); + while (c != -1 && "0123456789eE+-.".indexOf(c) >= 0) { + if (c == '.' || c == 'e' || c == 'E') + containedDecimalOrE = true; + sb.append((char) c); + c = nextChar(); + } + // the last character we looked at wasn't part of the number, put it + // back + putBack(c); + String s = sb.toString(); + try { + if (containedDecimalOrE) { + // force floating point representation + return Tokens + .newDouble(lineOrigin(), Double.parseDouble(s)); + } else { + // this should throw if the integer is too large for Long + return Tokens.newLong(lineOrigin(), Long.parseLong(s)); + } + } catch (NumberFormatException e) { + parseError("Invalid number", e); + throw new ConfigException.BugOrBroken("not reached"); + } + } + + private void pullEscapeSequence(StringBuilder sb) { + int escaped = nextChar(); + if (escaped == -1) + parseError("End of input but backslash in string had nothing after it"); + + switch (escaped) { + case '"': + sb.append('"'); + break; + case '\\': + sb.append('\\'); + break; + case '/': + sb.append('/'); + break; + case 'b': + sb.append('\b'); + break; + case 'f': + sb.append('\f'); + break; + case 'n': + sb.append('\n'); + break; + case 'r': + sb.append('\r'); + break; + case 't': + sb.append('\t'); + break; + case 'u': { + // kind of absurdly slow, but screw it for now + char[] a = new char[4]; + for (int i = 0; i < 4; ++i) { + int c = nextChar(); + if (c == -1) + parseError("End of input but expecting 4 hex digits for \\uXXXX escape"); + a[i] = (char) c; + } + String digits = new String(a); + try { + sb.appendCodePoint(Integer.parseInt(digits, 16)); + } catch (NumberFormatException e) { + parseError( + String.format( + "Malformed hex digits after \\u escape in string: '%s'", + digits), e); + } + } + break; + default: + parseError(String + .format("backslash followed by '%c', this is not a valid escape sequence", + escaped)); + } + } + + private Tokens.Token pullQuotedString() { + // the open quote has already been consumed + StringBuilder sb = new StringBuilder(); + int c = '\0'; // value doesn't get used + do { + c = nextChar(); + if (c == -1) + parseError("End of input but string quote was still open"); + + if (c == '\\') { + pullEscapeSequence(sb); + } else if (c == '"') { + // end the loop, done! + } else { + sb.append((char) c); + } + } while (c != '"'); + return Tokens.newString(lineOrigin(), sb.toString()); + } + + private void queueNextToken() { + int c = nextCharAfterWhitespace(); + if (c == -1) { + tokens.add(Tokens.END); + } else { + Token t = null; + switch (c) { + case '"': + t = pullQuotedString(); + break; + case ':': + t = Tokens.COLON; + break; + case ',': + t = Tokens.COMMA; + break; + case '{': + t = Tokens.OPEN_CURLY; + break; + case '}': + t = Tokens.CLOSE_CURLY; + break; + case '[': + t = Tokens.OPEN_SQUARE; + break; + case ']': + t = Tokens.CLOSE_SQUARE; + break; + case 't': + t = pullTrue(); + break; + case 'f': + t = pullFalse(); + break; + case 'n': + t = pullNull(); + break; + } + if (t == null) { + if ("-0123456789".indexOf(c) >= 0) { + t = pullNumber(c); + } else { + parseError(String + .format("Character '%c' is not the start of any valid token", + c)); + } + } + if (t == null) + throw new ConfigException.BugOrBroken( + "bug: failed to generate next token"); + tokens.add(t); + } + } + + TokenIterator(ConfigOrigin origin, Reader input) { + this.origin = origin; + this.input = input; + oneCharBuffer = -1; + lineNumber = 0; + tokens = new LinkedList(); + tokens.add(Tokens.START); + } + + @Override + public boolean hasNext() { + return !tokens.isEmpty(); + } + + @Override + public Token next() { + Tokens.Token t = tokens.remove(); + if (t != Tokens.END) { + queueNextToken(); + if (tokens.isEmpty()) + throw new ConfigException.BugOrBroken( + "bug: tokens queue should not be empty here"); + } + return t; + } + + @Override + public void remove() { + throw new UnsupportedOperationException( + "Does not make sense to remove items from token stream"); + } + } +} diff --git a/src/com/typesafe/config/impl/Tokens.java b/src/com/typesafe/config/impl/Tokens.java new file mode 100644 index 00000000..65aef46a --- /dev/null +++ b/src/com/typesafe/config/impl/Tokens.java @@ -0,0 +1,72 @@ +package com.typesafe.config.impl; + +import com.typesafe.config.ConfigOrigin; + +final class Tokens { + static class Token { + private TokenType tokenType; + + Token(TokenType tokenType) { + this.tokenType = tokenType; + } + + public TokenType tokenType() { + return tokenType; + } + } + + static class Value extends Token { + + private AbstractConfigValue value; + + Value(AbstractConfigValue value) { + super(TokenType.VALUE); + this.value = value; + } + + AbstractConfigValue value() { + return value; + } + } + + static boolean isValue(Token token) { + return token instanceof Value; + } + + static Token START = new Token(TokenType.START); + static Token END = new Token(TokenType.END); + static Token COMMA = new Token(TokenType.COMMA); + static Token COLON = new Token(TokenType.COLON); + static Token OPEN_CURLY = new Token(TokenType.OPEN_CURLY); + static Token CLOSE_CURLY = new Token(TokenType.CLOSE_CURLY); + static Token OPEN_SQUARE = new Token(TokenType.OPEN_SQUARE); + static Token CLOSE_SQUARE = new Token(TokenType.CLOSE_SQUARE); + + static Token newValue(AbstractConfigValue value) { + return new Value(value); + } + + static Token newString(ConfigOrigin origin, String value) { + return newValue(new ConfigString(origin, value)); + } + + static Token newInt(ConfigOrigin origin, int value) { + return newValue(new ConfigInt(origin, value)); + } + + static Token newDouble(ConfigOrigin origin, double value) { + return newValue(new ConfigDouble(origin, value)); + } + + static Token newLong(ConfigOrigin origin, long value) { + return newValue(new ConfigLong(origin, value)); + } + + static Token newNull(ConfigOrigin origin) { + return newValue(new ConfigNull(origin)); + } + + static Token newBoolean(ConfigOrigin origin, boolean value) { + return newValue(new ConfigBoolean(origin, value)); + } +}