Extract Path parsing into new class

Extract the logic to parse a Path out of the Parser and into
a new PathParser class.
This commit is contained in:
Preben Ingvaldsen 2015-03-11 13:59:57 -07:00
parent 0a804deff5
commit b19e38f29b
6 changed files with 221 additions and 205 deletions

View File

@ -382,7 +382,7 @@ final class Parser {
private static SubstitutionExpression tokenToSubstitutionExpression(Token valueToken) { private static SubstitutionExpression tokenToSubstitutionExpression(Token valueToken) {
List<Token> expression = Tokens.getSubstitutionPathExpression(valueToken); List<Token> expression = Tokens.getSubstitutionPathExpression(valueToken);
Path path = parsePathExpression(expression.iterator(), valueToken.origin()); Path path = PathParser.parsePathExpression(expression.iterator(), valueToken.origin());
boolean optional = Tokens.getSubstitutionOptional(valueToken); boolean optional = Tokens.getSubstitutionOptional(valueToken);
return new SubstitutionExpression(path, optional); return new SubstitutionExpression(path, optional);
@ -604,7 +604,7 @@ final class Parser {
} }
putBack(t); // put back the token we ended with putBack(t); // put back the token we ended with
return parsePathExpression(expression.iterator(), lineOrigin()); return PathParser.parsePathExpression(expression.iterator(), lineOrigin());
} }
} }
@ -1016,202 +1016,4 @@ final class Parser {
} }
} }
} }
static class Element {
StringBuilder sb;
// an element can be empty if it has a quoted empty string "" in it
boolean canBeEmpty;
Element(String initial, boolean canBeEmpty) {
this.canBeEmpty = canBeEmpty;
this.sb = new StringBuilder(initial);
}
@Override
public String toString() {
return "Element(" + sb.toString() + "," + canBeEmpty + ")";
}
}
private static void addPathText(List<Element> buf, boolean wasQuoted,
String newText) {
int i = wasQuoted ? -1 : newText.indexOf('.');
Element current = buf.get(buf.size() - 1);
if (i < 0) {
// add to current path element
current.sb.append(newText);
// any empty quoted string means this element can
// now be empty.
if (wasQuoted && current.sb.length() == 0)
current.canBeEmpty = true;
} else {
// "buf" plus up to the period is an element
current.sb.append(newText.substring(0, i));
// then start a new element
buf.add(new Element("", false));
// recurse to consume remainder of newText
addPathText(buf, false, newText.substring(i + 1));
}
}
private static Path parsePathExpression(Iterator<Token> expression,
ConfigOrigin origin) {
return parsePathExpression(expression, origin, null);
}
// originalText may be null if not available
private static Path parsePathExpression(Iterator<Token> expression,
ConfigOrigin origin, String originalText) {
// each builder in "buf" is an element in the path.
ArrayList<Token> pathTokens = new ArrayList<Token>();
List<Element> buf = new ArrayList<Element>();
buf.add(new Element("", false));
if (!expression.hasNext()) {
throw new ConfigException.BadPath(origin, originalText,
"Expecting a field name or path here, but got nothing");
}
while (expression.hasNext()) {
Token t = expression.next();
pathTokens.add(t);
// Ignore all IgnoredWhitespace tokens
if (Tokens.isIgnoredWhitespace(t))
continue;
if (Tokens.isValueWithType(t, ConfigValueType.STRING)) {
AbstractConfigValue v = Tokens.getValue(t);
// this is a quoted string; so any periods
// in here don't count as path separators
String s = v.transformToString();
addPathText(buf, true, s);
} else if (t == Tokens.END) {
// ignore this; when parsing a file, it should not happen
// since we're parsing a token list rather than the main
// token iterator, and when parsing a path expression from the
// API, it's expected to have an END.
} else {
// any periods outside of a quoted string count as
// separators
String text;
if (Tokens.isValue(t)) {
// appending a number here may add
// a period, but we _do_ count those as path
// separators, because we basically want
// "foo 3.0bar" to parse as a string even
// though there's a number in it. The fact that
// we tokenize non-string values is largely an
// implementation detail.
AbstractConfigValue v = Tokens.getValue(t);
text = v.transformToString();
} else if (Tokens.isUnquotedText(t)) {
text = Tokens.getUnquotedText(t);
} else {
throw new ConfigException.BadPath(
origin,
originalText,
"Token not allowed in path expression: "
+ t
+ " (you can double-quote this token if you really want it here)");
}
addPathText(buf, false, text);
}
}
PathBuilder pb = new PathBuilder(pathTokens);
for (Element e : buf) {
if (e.sb.length() == 0 && !e.canBeEmpty) {
throw new ConfigException.BadPath(
origin,
originalText,
"path has a leading, trailing, or two adjacent period '.' (use quoted \"\" empty string if you want an empty element)");
} else {
pb.appendKey(e.sb.toString());
}
}
return pb.result();
}
static ConfigOrigin apiOrigin = SimpleConfigOrigin.newSimple("path parameter");
static Path parsePath(String path) {
Path speculated = speculativeFastParsePath(path);
if (speculated != null)
return speculated;
StringReader reader = new StringReader(path);
try {
Iterator<Token> tokens = Tokenizer.tokenize(apiOrigin, reader,
ConfigSyntax.CONF);
tokens.next(); // drop START
return parsePathExpression(tokens, apiOrigin, path);
} finally {
reader.close();
}
}
// the idea is to see if the string has any chars or features
// that might require the full parser to deal with.
private static boolean looksUnsafeForFastParser(String s) {
boolean lastWasDot = true; // start of path is also a "dot"
int len = s.length();
if (s.isEmpty())
return true;
if (s.charAt(0) == '.')
return true;
if (s.charAt(len - 1) == '.')
return true;
for (int i = 0; i < len; ++i) {
char c = s.charAt(i);
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') {
lastWasDot = false;
continue;
} else if (c == '.') {
if (lastWasDot)
return true; // ".." means we need to throw an error
lastWasDot = true;
} else if (c == '-') {
if (lastWasDot)
return true;
continue;
} else {
return true;
}
}
if (lastWasDot)
return true;
return false;
}
private static Path fastPathBuild(Path tail, String s, int end) {
// lastIndexOf takes last index it should look at, end - 1 not end
int splitAt = s.lastIndexOf('.', end - 1);
ArrayList<Token> tokens = new ArrayList<Token>();
tokens.add(Tokens.newUnquotedText(null, s));
// this works even if splitAt is -1; then we start the substring at 0
Path withOneMoreElement = new Path(s.substring(splitAt + 1, end), tail, tokens);
if (splitAt < 0) {
return withOneMoreElement;
} else {
return fastPathBuild(withOneMoreElement, s, splitAt);
}
}
// do something much faster than the full parser if
// we just have something like "foo" or "foo.bar"
private static Path speculativeFastParsePath(String path) {
String s = ConfigImplUtil.unicodeTrim(path);
if (looksUnsafeForFastParser(s))
return null;
return fastPathBuild(null, s, s.length());
}
} }

View File

@ -245,6 +245,6 @@ final class Path {
} }
static Path newPath(String path) { static Path newPath(String path) {
return Parser.parsePath(path); return PathParser.parsePath(path);
} }
} }

View File

@ -0,0 +1,214 @@
/**
* Copyright (C) 2015 Typesafe Inc. <http://typesafe.com>
*/
package com.typesafe.config.impl;
import com.typesafe.config.ConfigException;
import com.typesafe.config.ConfigOrigin;
import com.typesafe.config.ConfigSyntax;
import com.typesafe.config.ConfigValueType;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
final class PathParser {
static class Element {
StringBuilder sb;
// an element can be empty if it has a quoted empty string "" in it
boolean canBeEmpty;
Element(String initial, boolean canBeEmpty) {
this.canBeEmpty = canBeEmpty;
this.sb = new StringBuilder(initial);
}
@Override
public String toString() {
return "Element(" + sb.toString() + "," + canBeEmpty + ")";
}
}
static ConfigOrigin apiOrigin = SimpleConfigOrigin.newSimple("path parameter");
static Path parsePath(String path) {
Path speculated = speculativeFastParsePath(path);
if (speculated != null)
return speculated;
StringReader reader = new StringReader(path);
try {
Iterator<Token> tokens = Tokenizer.tokenize(apiOrigin, reader,
ConfigSyntax.CONF);
tokens.next(); // drop START
return parsePathExpression(tokens, apiOrigin, path);
} finally {
reader.close();
}
}
protected static Path parsePathExpression(Iterator<Token> expression,
ConfigOrigin origin) {
return parsePathExpression(expression, origin, null);
}
// originalText may be null if not available
protected static Path parsePathExpression(Iterator<Token> expression,
ConfigOrigin origin, String originalText) {
// each builder in "buf" is an element in the path.
ArrayList<Token> pathTokens = new ArrayList<Token>();
List<Element> buf = new ArrayList<Element>();
buf.add(new Element("", false));
if (!expression.hasNext()) {
throw new ConfigException.BadPath(origin, originalText,
"Expecting a field name or path here, but got nothing");
}
while (expression.hasNext()) {
Token t = expression.next();
pathTokens.add(t);
// Ignore all IgnoredWhitespace tokens
if (Tokens.isIgnoredWhitespace(t))
continue;
if (Tokens.isValueWithType(t, ConfigValueType.STRING)) {
AbstractConfigValue v = Tokens.getValue(t);
// this is a quoted string; so any periods
// in here don't count as path separators
String s = v.transformToString();
addPathText(buf, true, s);
} else if (t == Tokens.END) {
// ignore this; when parsing a file, it should not happen
// since we're parsing a token list rather than the main
// token iterator, and when parsing a path expression from the
// API, it's expected to have an END.
} else {
// any periods outside of a quoted string count as
// separators
String text;
if (Tokens.isValue(t)) {
// appending a number here may add
// a period, but we _do_ count those as path
// separators, because we basically want
// "foo 3.0bar" to parse as a string even
// though there's a number in it. The fact that
// we tokenize non-string values is largely an
// implementation detail.
AbstractConfigValue v = Tokens.getValue(t);
text = v.transformToString();
} else if (Tokens.isUnquotedText(t)) {
text = Tokens.getUnquotedText(t);
} else {
throw new ConfigException.BadPath(
origin,
originalText,
"Token not allowed in path expression: "
+ t
+ " (you can double-quote this token if you really want it here)");
}
addPathText(buf, false, text);
}
}
PathBuilder pb = new PathBuilder(pathTokens);
for (Element e : buf) {
if (e.sb.length() == 0 && !e.canBeEmpty) {
throw new ConfigException.BadPath(
origin,
originalText,
"path has a leading, trailing, or two adjacent period '.' (use quoted \"\" empty string if you want an empty element)");
} else {
pb.appendKey(e.sb.toString());
}
}
return pb.result();
}
private static void addPathText(List<Element> buf, boolean wasQuoted,
String newText) {
int i = wasQuoted ? -1 : newText.indexOf('.');
Element current = buf.get(buf.size() - 1);
if (i < 0) {
// add to current path element
current.sb.append(newText);
// any empty quoted string means this element can
// now be empty.
if (wasQuoted && current.sb.length() == 0)
current.canBeEmpty = true;
} else {
// "buf" plus up to the period is an element
current.sb.append(newText.substring(0, i));
// then start a new element
buf.add(new Element("", false));
// recurse to consume remainder of newText
addPathText(buf, false, newText.substring(i + 1));
}
}
// the idea is to see if the string has any chars or features
// that might require the full parser to deal with.
private static boolean looksUnsafeForFastParser(String s) {
boolean lastWasDot = true; // start of path is also a "dot"
int len = s.length();
if (s.isEmpty())
return true;
if (s.charAt(0) == '.')
return true;
if (s.charAt(len - 1) == '.')
return true;
for (int i = 0; i < len; ++i) {
char c = s.charAt(i);
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') {
lastWasDot = false;
continue;
} else if (c == '.') {
if (lastWasDot)
return true; // ".." means we need to throw an error
lastWasDot = true;
} else if (c == '-') {
if (lastWasDot)
return true;
continue;
} else {
return true;
}
}
if (lastWasDot)
return true;
return false;
}
private static Path fastPathBuild(Path tail, String s, int end) {
// lastIndexOf takes last index it should look at, end - 1 not end
int splitAt = s.lastIndexOf('.', end - 1);
ArrayList<Token> tokens = new ArrayList<Token>();
tokens.add(Tokens.newUnquotedText(null, s));
// this works even if splitAt is -1; then we start the substring at 0
Path withOneMoreElement = new Path(s.substring(splitAt + 1, end), tail, tokens);
if (splitAt < 0) {
return withOneMoreElement;
} else {
return fastPathBuild(withOneMoreElement, s, splitAt);
}
}
// do something much faster than the full parser if
// we just have something like "foo" or "foo.bar"
private static Path speculativeFastParsePath(String path) {
String s = ConfigImplUtil.unicodeTrim(path);
if (looksUnsafeForFastParser(s))
return null;
return fastPathBuild(null, s, s.length());
}
}

View File

@ -90,7 +90,7 @@ class ConfParserTest extends TestUtils {
// also parse with the standalone path parser and be sure the // also parse with the standalone path parser and be sure the
// outcome is the same. // outcome is the same.
try { try {
val shouldBeSame = Parser.parsePath(s) val shouldBeSame = PathParser.parsePath(s)
assertEquals(result, shouldBeSame) assertEquals(result, shouldBeSame)
} catch { } catch {
case e: ConfigException => case e: ConfigException =>

View File

@ -73,8 +73,8 @@ class PathTest extends TestUtils {
for (t <- tests) { for (t <- tests) {
assertEquals(t.expected, t.path.render()) assertEquals(t.expected, t.path.render())
assertEquals(t.path, Parser.parsePath(t.expected)) assertEquals(t.path, PathParser.parsePath(t.expected))
assertEquals(t.path, Parser.parsePath(t.path.render())) assertEquals(t.path, PathParser.parsePath(t.path.render()))
} }
} }

View File

@ -668,7 +668,7 @@ abstract trait TestUtils {
} }
def configNodeKey(path: String) = { def configNodeKey(path: String) = {
val parsedPath = Parser.parsePath(path) val parsedPath = PathParser.parsePath(path)
new ConfigNodeKey(parsedPath) new ConfigNodeKey(parsedPath)
} }