more hacking, mostly on json tokenizer

This commit is contained in:
Havoc Pennington 2011-11-06 23:14:42 -05:00
parent 6b54720ddd
commit f5edf529a5
8 changed files with 533 additions and 1 deletions

View File

@ -122,4 +122,46 @@ public class ConfigException extends RuntimeException {
this(path, message, null);
}
}
/**
* Exception indicating that there's a bug in something or the runtime
* environment is broken. This exception should never be handled; instead,
* something should be fixed to keep the exception from occurring.
*
*/
public static class BugOrBroken extends ConfigException {
private static final long serialVersionUID = 1L;
public BugOrBroken(String message, Throwable cause) {
super(message, cause);
}
public BugOrBroken(String message) {
this(message, null);
}
}
public static class IO extends ConfigException {
private static final long serialVersionUID = 1L;
public IO(ConfigOrigin origin, String message, Throwable cause) {
super(origin, message, cause);
}
public IO(ConfigOrigin origin, String message) {
this(origin, message, null);
}
}
public static class Parse extends ConfigException {
private static final long serialVersionUID = 1L;
public Parse(ConfigOrigin origin, String message, Throwable cause) {
super(origin, message, cause);
}
public Parse(ConfigOrigin origin, String message) {
this(origin, message, null);
}
}
}

View File

@ -0,0 +1,31 @@
package com.typesafe.config.impl;
import java.util.List;
import com.typesafe.config.ConfigOrigin;
import com.typesafe.config.ConfigValueType;
final class ConfigSubstitution extends AbstractConfigValue {
private AbstractConfigObject root;
private List<Tokens.Token> tokens;
ConfigSubstitution(ConfigOrigin origin, AbstractConfigObject root,
List<Tokens.Token> tokens) {
super(origin);
this.root = root;
this.tokens = tokens;
}
@Override
public ConfigValueType valueType() {
return null; // FIXME
}
@Override
public Object unwrapped() {
// TODO Auto-generated method stub
return null;
}
}

View File

@ -24,19 +24,22 @@ class DefaultTransformer implements ConfigTransformer {
} catch (NumberFormatException e) {
// oh well.
}
break;
case NULL:
if (s.equals("null"))
return new ConfigNull(value.origin());
break;
case BOOLEAN:
if (s.equals("true") || s.equals("yes")) {
return new ConfigBoolean(value.origin(), true);
} else if (s.equals("false") || s.equals("no")) {
return new ConfigBoolean(value.origin(), false);
}
break;
}
} else if (requested == ConfigValueType.STRING) {
switch (value.valueType()) {
case NUMBER:
case NUMBER: // FALL THROUGH
case BOOLEAN:
return new ConfigString(value.origin(), value.unwrapped()
.toString());

View File

@ -0,0 +1,37 @@
package com.typesafe.config.impl;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import com.typesafe.config.ConfigException;
import com.typesafe.config.ConfigOrigin;
final class Parser {
/**
* Parses an input stream, which must be in UTF-8 encoding and should be
* buffered. Does not close the stream; you have to arrange to do that
* yourself.
*/
static AbstractConfigObject parse(ConfigOrigin origin, InputStream input) {
try {
return parse(origin, new InputStreamReader(input, "UTF-8"));
} catch (UnsupportedEncodingException e) {
throw new ConfigException.BugOrBroken(
"Java runtime does not support UTF-8");
}
}
static AbstractConfigObject parse(ConfigOrigin origin,
Reader input) {
Iterator<Tokens.Token> tokens = Tokenizer.tokenize(origin, input);
return parse(origin, tokens);
}
private static AbstractConfigObject parse(ConfigOrigin origin,
Iterator<Tokens.Token> tokens) {
return null; // FIXME
}
}

View File

@ -0,0 +1,27 @@
package com.typesafe.config.impl;
import com.typesafe.config.ConfigValueType;
enum RawValueType {
OBJECT(ConfigValueType.OBJECT),
LIST(ConfigValueType.LIST),
NUMBER(ConfigValueType.NUMBER),
BOOLEAN(ConfigValueType.BOOLEAN),
NULL(ConfigValueType.NULL),
STRING(ConfigValueType.STRING),
SUBSTITUTION(null),
INCLUDE(null);
ConfigValueType cooked;
RawValueType(ConfigValueType cooked) {
this.cooked = cooked;
}
}

View File

@ -0,0 +1,5 @@
package com.typesafe.config.impl;
enum TokenType {
START, END, COMMA, COLON, OPEN_CURLY, CLOSE_CURLY, OPEN_SQUARE, CLOSE_SQUARE, VALUE;
}

View File

@ -0,0 +1,315 @@
package com.typesafe.config.impl;
import java.io.IOException;
import java.io.Reader;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Queue;
import com.typesafe.config.ConfigException;
import com.typesafe.config.ConfigOrigin;
import com.typesafe.config.impl.Tokens.Token;
final class Tokenizer {
/**
* Tokenizes a Reader. Does not close the reader; you have to arrange to do
* that after you're done with the returned iterator.
*/
static Iterator<Tokens.Token> tokenize(ConfigOrigin origin, Reader input) {
return new TokenIterator(origin, input);
}
private static class TokenIterator implements Iterator<Tokens.Token> {
private ConfigOrigin origin;
private Reader input;
private int oneCharBuffer;
private int lineNumber;
private Queue<Tokens.Token> tokens;
private int nextChar() {
if (oneCharBuffer >= 0) {
int c = oneCharBuffer;
oneCharBuffer = -1;
return c;
} else {
try {
return input.read();
} catch (IOException e) {
throw new ConfigException.IO(origin, "read error: "
+ e.getMessage(), e);
}
}
}
private void putBack(int c) {
if (oneCharBuffer >= 0) {
throw new ConfigException.BugOrBroken(
"bug: attempt to putBack() twice in a row");
}
oneCharBuffer = c;
}
private int nextCharAfterWhitespace() {
for (;;) {
int c = nextChar();
if (c == -1) {
return -1;
} else if (Character.isWhitespace(c)) {
if (c == '\n')
lineNumber += 1;
continue;
} else {
return c;
}
}
}
private void parseError(String message) {
parseError(message, null);
}
private void parseError(String message, Throwable cause) {
throw new ConfigException.Parse(origin,
lineNumber + ": " + message, cause);
}
private void checkNextOrThrow(String expectedBefore, String expectedNow) {
int i = 0;
while (i < expectedNow.length()) {
int expected = expectedNow.charAt(i);
int actual = nextChar();
if (actual == -1)
parseError(String.format(
"Expecting '%s%s' but input data ended",
expectedBefore, expectedNow));
if (actual != expected)
parseError(String
.format("Expecting '%s%s' but got char '%c' rather than '%c'",
expectedBefore, expectedNow, actual,
expected));
++i;
}
}
private ConfigOrigin lineOrigin() {
return new SimpleConfigOrigin(origin.description() + ": line "
+ lineNumber);
}
private Tokens.Token pullTrue() {
// "t" has been already seen
checkNextOrThrow("t", "rue");
return Tokens.newBoolean(lineOrigin(), true);
}
private Tokens.Token pullFalse() {
// "f" has been already seen
checkNextOrThrow("f", "alse");
return Tokens.newBoolean(lineOrigin(), false);
}
private Tokens.Token pullNull() {
// "n" has been already seen
checkNextOrThrow("n", "ull");
return Tokens.newNull(lineOrigin());
}
private Tokens.Token pullNumber(int firstChar) {
StringBuilder sb = new StringBuilder();
sb.append((char) firstChar);
boolean containedDecimalOrE = false;
int c = nextChar();
while (c != -1 && "0123456789eE+-.".indexOf(c) >= 0) {
if (c == '.' || c == 'e' || c == 'E')
containedDecimalOrE = true;
sb.append((char) c);
c = nextChar();
}
// the last character we looked at wasn't part of the number, put it
// back
putBack(c);
String s = sb.toString();
try {
if (containedDecimalOrE) {
// force floating point representation
return Tokens
.newDouble(lineOrigin(), Double.parseDouble(s));
} else {
// this should throw if the integer is too large for Long
return Tokens.newLong(lineOrigin(), Long.parseLong(s));
}
} catch (NumberFormatException e) {
parseError("Invalid number", e);
throw new ConfigException.BugOrBroken("not reached");
}
}
private void pullEscapeSequence(StringBuilder sb) {
int escaped = nextChar();
if (escaped == -1)
parseError("End of input but backslash in string had nothing after it");
switch (escaped) {
case '"':
sb.append('"');
break;
case '\\':
sb.append('\\');
break;
case '/':
sb.append('/');
break;
case 'b':
sb.append('\b');
break;
case 'f':
sb.append('\f');
break;
case 'n':
sb.append('\n');
break;
case 'r':
sb.append('\r');
break;
case 't':
sb.append('\t');
break;
case 'u': {
// kind of absurdly slow, but screw it for now
char[] a = new char[4];
for (int i = 0; i < 4; ++i) {
int c = nextChar();
if (c == -1)
parseError("End of input but expecting 4 hex digits for \\uXXXX escape");
a[i] = (char) c;
}
String digits = new String(a);
try {
sb.appendCodePoint(Integer.parseInt(digits, 16));
} catch (NumberFormatException e) {
parseError(
String.format(
"Malformed hex digits after \\u escape in string: '%s'",
digits), e);
}
}
break;
default:
parseError(String
.format("backslash followed by '%c', this is not a valid escape sequence",
escaped));
}
}
private Tokens.Token pullQuotedString() {
// the open quote has already been consumed
StringBuilder sb = new StringBuilder();
int c = '\0'; // value doesn't get used
do {
c = nextChar();
if (c == -1)
parseError("End of input but string quote was still open");
if (c == '\\') {
pullEscapeSequence(sb);
} else if (c == '"') {
// end the loop, done!
} else {
sb.append((char) c);
}
} while (c != '"');
return Tokens.newString(lineOrigin(), sb.toString());
}
private void queueNextToken() {
int c = nextCharAfterWhitespace();
if (c == -1) {
tokens.add(Tokens.END);
} else {
Token t = null;
switch (c) {
case '"':
t = pullQuotedString();
break;
case ':':
t = Tokens.COLON;
break;
case ',':
t = Tokens.COMMA;
break;
case '{':
t = Tokens.OPEN_CURLY;
break;
case '}':
t = Tokens.CLOSE_CURLY;
break;
case '[':
t = Tokens.OPEN_SQUARE;
break;
case ']':
t = Tokens.CLOSE_SQUARE;
break;
case 't':
t = pullTrue();
break;
case 'f':
t = pullFalse();
break;
case 'n':
t = pullNull();
break;
}
if (t == null) {
if ("-0123456789".indexOf(c) >= 0) {
t = pullNumber(c);
} else {
parseError(String
.format("Character '%c' is not the start of any valid token",
c));
}
}
if (t == null)
throw new ConfigException.BugOrBroken(
"bug: failed to generate next token");
tokens.add(t);
}
}
TokenIterator(ConfigOrigin origin, Reader input) {
this.origin = origin;
this.input = input;
oneCharBuffer = -1;
lineNumber = 0;
tokens = new LinkedList<Tokens.Token>();
tokens.add(Tokens.START);
}
@Override
public boolean hasNext() {
return !tokens.isEmpty();
}
@Override
public Token next() {
Tokens.Token t = tokens.remove();
if (t != Tokens.END) {
queueNextToken();
if (tokens.isEmpty())
throw new ConfigException.BugOrBroken(
"bug: tokens queue should not be empty here");
}
return t;
}
@Override
public void remove() {
throw new UnsupportedOperationException(
"Does not make sense to remove items from token stream");
}
}
}

View File

@ -0,0 +1,72 @@
package com.typesafe.config.impl;
import com.typesafe.config.ConfigOrigin;
final class Tokens {
static class Token {
private TokenType tokenType;
Token(TokenType tokenType) {
this.tokenType = tokenType;
}
public TokenType tokenType() {
return tokenType;
}
}
static class Value extends Token {
private AbstractConfigValue value;
Value(AbstractConfigValue value) {
super(TokenType.VALUE);
this.value = value;
}
AbstractConfigValue value() {
return value;
}
}
static boolean isValue(Token token) {
return token instanceof Value;
}
static Token START = new Token(TokenType.START);
static Token END = new Token(TokenType.END);
static Token COMMA = new Token(TokenType.COMMA);
static Token COLON = new Token(TokenType.COLON);
static Token OPEN_CURLY = new Token(TokenType.OPEN_CURLY);
static Token CLOSE_CURLY = new Token(TokenType.CLOSE_CURLY);
static Token OPEN_SQUARE = new Token(TokenType.OPEN_SQUARE);
static Token CLOSE_SQUARE = new Token(TokenType.CLOSE_SQUARE);
static Token newValue(AbstractConfigValue value) {
return new Value(value);
}
static Token newString(ConfigOrigin origin, String value) {
return newValue(new ConfigString(origin, value));
}
static Token newInt(ConfigOrigin origin, int value) {
return newValue(new ConfigInt(origin, value));
}
static Token newDouble(ConfigOrigin origin, double value) {
return newValue(new ConfigDouble(origin, value));
}
static Token newLong(ConfigOrigin origin, long value) {
return newValue(new ConfigLong(origin, value));
}
static Token newNull(ConfigOrigin origin) {
return newValue(new ConfigNull(origin));
}
static Token newBoolean(ConfigOrigin origin, boolean value) {
return newValue(new ConfigBoolean(origin, value));
}
}