From b19e38f29ba88ae5b8d316ea6309a3ecbf50df1f Mon Sep 17 00:00:00 2001 From: Preben Ingvaldsen Date: Wed, 11 Mar 2015 13:59:57 -0700 Subject: [PATCH] Extract Path parsing into new class Extract the logic to parse a Path out of the Parser and into a new PathParser class. --- .../java/com/typesafe/config/impl/Parser.java | 202 +---------------- .../java/com/typesafe/config/impl/Path.java | 2 +- .../com/typesafe/config/impl/PathParser.java | 214 ++++++++++++++++++ .../typesafe/config/impl/ConfParserTest.scala | 2 +- .../com/typesafe/config/impl/PathTest.scala | 4 +- .../com/typesafe/config/impl/TestUtils.scala | 2 +- 6 files changed, 221 insertions(+), 205 deletions(-) create mode 100644 config/src/main/java/com/typesafe/config/impl/PathParser.java diff --git a/config/src/main/java/com/typesafe/config/impl/Parser.java b/config/src/main/java/com/typesafe/config/impl/Parser.java index e5b9362f..58afc897 100644 --- a/config/src/main/java/com/typesafe/config/impl/Parser.java +++ b/config/src/main/java/com/typesafe/config/impl/Parser.java @@ -382,7 +382,7 @@ final class Parser { private static SubstitutionExpression tokenToSubstitutionExpression(Token valueToken) { List expression = Tokens.getSubstitutionPathExpression(valueToken); - Path path = parsePathExpression(expression.iterator(), valueToken.origin()); + Path path = PathParser.parsePathExpression(expression.iterator(), valueToken.origin()); boolean optional = Tokens.getSubstitutionOptional(valueToken); return new SubstitutionExpression(path, optional); @@ -604,7 +604,7 @@ final class Parser { } putBack(t); // put back the token we ended with - return parsePathExpression(expression.iterator(), lineOrigin()); + return PathParser.parsePathExpression(expression.iterator(), lineOrigin()); } } @@ -1016,202 +1016,4 @@ final class Parser { } } } - - static class Element { - StringBuilder sb; - // an element can be empty if it has a quoted empty string "" in it - boolean canBeEmpty; - - Element(String initial, boolean canBeEmpty) { - this.canBeEmpty = canBeEmpty; - this.sb = new StringBuilder(initial); - } - - @Override - public String toString() { - return "Element(" + sb.toString() + "," + canBeEmpty + ")"; - } - } - - private static void addPathText(List buf, boolean wasQuoted, - String newText) { - int i = wasQuoted ? -1 : newText.indexOf('.'); - Element current = buf.get(buf.size() - 1); - if (i < 0) { - // add to current path element - current.sb.append(newText); - // any empty quoted string means this element can - // now be empty. - if (wasQuoted && current.sb.length() == 0) - current.canBeEmpty = true; - } else { - // "buf" plus up to the period is an element - current.sb.append(newText.substring(0, i)); - // then start a new element - buf.add(new Element("", false)); - // recurse to consume remainder of newText - addPathText(buf, false, newText.substring(i + 1)); - } - } - - private static Path parsePathExpression(Iterator expression, - ConfigOrigin origin) { - return parsePathExpression(expression, origin, null); - } - - // originalText may be null if not available - private static Path parsePathExpression(Iterator expression, - ConfigOrigin origin, String originalText) { - // each builder in "buf" is an element in the path. - ArrayList pathTokens = new ArrayList(); - List buf = new ArrayList(); - buf.add(new Element("", false)); - - if (!expression.hasNext()) { - throw new ConfigException.BadPath(origin, originalText, - "Expecting a field name or path here, but got nothing"); - } - - while (expression.hasNext()) { - Token t = expression.next(); - pathTokens.add(t); - - // Ignore all IgnoredWhitespace tokens - if (Tokens.isIgnoredWhitespace(t)) - continue; - - if (Tokens.isValueWithType(t, ConfigValueType.STRING)) { - AbstractConfigValue v = Tokens.getValue(t); - // this is a quoted string; so any periods - // in here don't count as path separators - String s = v.transformToString(); - - addPathText(buf, true, s); - } else if (t == Tokens.END) { - // ignore this; when parsing a file, it should not happen - // since we're parsing a token list rather than the main - // token iterator, and when parsing a path expression from the - // API, it's expected to have an END. - } else { - // any periods outside of a quoted string count as - // separators - String text; - if (Tokens.isValue(t)) { - // appending a number here may add - // a period, but we _do_ count those as path - // separators, because we basically want - // "foo 3.0bar" to parse as a string even - // though there's a number in it. The fact that - // we tokenize non-string values is largely an - // implementation detail. - AbstractConfigValue v = Tokens.getValue(t); - text = v.transformToString(); - } else if (Tokens.isUnquotedText(t)) { - text = Tokens.getUnquotedText(t); - } else { - throw new ConfigException.BadPath( - origin, - originalText, - "Token not allowed in path expression: " - + t - + " (you can double-quote this token if you really want it here)"); - } - - addPathText(buf, false, text); - } - } - - PathBuilder pb = new PathBuilder(pathTokens); - for (Element e : buf) { - if (e.sb.length() == 0 && !e.canBeEmpty) { - throw new ConfigException.BadPath( - origin, - originalText, - "path has a leading, trailing, or two adjacent period '.' (use quoted \"\" empty string if you want an empty element)"); - } else { - pb.appendKey(e.sb.toString()); - } - } - - return pb.result(); - } - - static ConfigOrigin apiOrigin = SimpleConfigOrigin.newSimple("path parameter"); - - static Path parsePath(String path) { - Path speculated = speculativeFastParsePath(path); - if (speculated != null) - return speculated; - - StringReader reader = new StringReader(path); - - try { - Iterator tokens = Tokenizer.tokenize(apiOrigin, reader, - ConfigSyntax.CONF); - tokens.next(); // drop START - return parsePathExpression(tokens, apiOrigin, path); - } finally { - reader.close(); - } - } - - // the idea is to see if the string has any chars or features - // that might require the full parser to deal with. - private static boolean looksUnsafeForFastParser(String s) { - boolean lastWasDot = true; // start of path is also a "dot" - int len = s.length(); - if (s.isEmpty()) - return true; - if (s.charAt(0) == '.') - return true; - if (s.charAt(len - 1) == '.') - return true; - - for (int i = 0; i < len; ++i) { - char c = s.charAt(i); - if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') { - lastWasDot = false; - continue; - } else if (c == '.') { - if (lastWasDot) - return true; // ".." means we need to throw an error - lastWasDot = true; - } else if (c == '-') { - if (lastWasDot) - return true; - continue; - } else { - return true; - } - } - - if (lastWasDot) - return true; - - return false; - } - - private static Path fastPathBuild(Path tail, String s, int end) { - // lastIndexOf takes last index it should look at, end - 1 not end - int splitAt = s.lastIndexOf('.', end - 1); - ArrayList tokens = new ArrayList(); - tokens.add(Tokens.newUnquotedText(null, s)); - // this works even if splitAt is -1; then we start the substring at 0 - Path withOneMoreElement = new Path(s.substring(splitAt + 1, end), tail, tokens); - if (splitAt < 0) { - return withOneMoreElement; - } else { - return fastPathBuild(withOneMoreElement, s, splitAt); - } - } - - // do something much faster than the full parser if - // we just have something like "foo" or "foo.bar" - private static Path speculativeFastParsePath(String path) { - String s = ConfigImplUtil.unicodeTrim(path); - if (looksUnsafeForFastParser(s)) - return null; - - return fastPathBuild(null, s, s.length()); - } } diff --git a/config/src/main/java/com/typesafe/config/impl/Path.java b/config/src/main/java/com/typesafe/config/impl/Path.java index 5cb041a1..eb4ed012 100644 --- a/config/src/main/java/com/typesafe/config/impl/Path.java +++ b/config/src/main/java/com/typesafe/config/impl/Path.java @@ -245,6 +245,6 @@ final class Path { } static Path newPath(String path) { - return Parser.parsePath(path); + return PathParser.parsePath(path); } } diff --git a/config/src/main/java/com/typesafe/config/impl/PathParser.java b/config/src/main/java/com/typesafe/config/impl/PathParser.java new file mode 100644 index 00000000..69065628 --- /dev/null +++ b/config/src/main/java/com/typesafe/config/impl/PathParser.java @@ -0,0 +1,214 @@ +/** + * Copyright (C) 2015 Typesafe Inc. + */ +package com.typesafe.config.impl; + +import com.typesafe.config.ConfigException; +import com.typesafe.config.ConfigOrigin; +import com.typesafe.config.ConfigSyntax; +import com.typesafe.config.ConfigValueType; + +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +final class PathParser { + static class Element { + StringBuilder sb; + // an element can be empty if it has a quoted empty string "" in it + boolean canBeEmpty; + + Element(String initial, boolean canBeEmpty) { + this.canBeEmpty = canBeEmpty; + this.sb = new StringBuilder(initial); + } + + @Override + public String toString() { + return "Element(" + sb.toString() + "," + canBeEmpty + ")"; + } + } + + static ConfigOrigin apiOrigin = SimpleConfigOrigin.newSimple("path parameter"); + + static Path parsePath(String path) { + Path speculated = speculativeFastParsePath(path); + if (speculated != null) + return speculated; + + StringReader reader = new StringReader(path); + + try { + Iterator tokens = Tokenizer.tokenize(apiOrigin, reader, + ConfigSyntax.CONF); + tokens.next(); // drop START + return parsePathExpression(tokens, apiOrigin, path); + } finally { + reader.close(); + } + } + + protected static Path parsePathExpression(Iterator expression, + ConfigOrigin origin) { + return parsePathExpression(expression, origin, null); + } + + // originalText may be null if not available + protected static Path parsePathExpression(Iterator expression, + ConfigOrigin origin, String originalText) { + // each builder in "buf" is an element in the path. + ArrayList pathTokens = new ArrayList(); + List buf = new ArrayList(); + buf.add(new Element("", false)); + + if (!expression.hasNext()) { + throw new ConfigException.BadPath(origin, originalText, + "Expecting a field name or path here, but got nothing"); + } + + while (expression.hasNext()) { + Token t = expression.next(); + pathTokens.add(t); + + // Ignore all IgnoredWhitespace tokens + if (Tokens.isIgnoredWhitespace(t)) + continue; + + if (Tokens.isValueWithType(t, ConfigValueType.STRING)) { + AbstractConfigValue v = Tokens.getValue(t); + // this is a quoted string; so any periods + // in here don't count as path separators + String s = v.transformToString(); + + addPathText(buf, true, s); + } else if (t == Tokens.END) { + // ignore this; when parsing a file, it should not happen + // since we're parsing a token list rather than the main + // token iterator, and when parsing a path expression from the + // API, it's expected to have an END. + } else { + // any periods outside of a quoted string count as + // separators + String text; + if (Tokens.isValue(t)) { + // appending a number here may add + // a period, but we _do_ count those as path + // separators, because we basically want + // "foo 3.0bar" to parse as a string even + // though there's a number in it. The fact that + // we tokenize non-string values is largely an + // implementation detail. + AbstractConfigValue v = Tokens.getValue(t); + text = v.transformToString(); + } else if (Tokens.isUnquotedText(t)) { + text = Tokens.getUnquotedText(t); + } else { + throw new ConfigException.BadPath( + origin, + originalText, + "Token not allowed in path expression: " + + t + + " (you can double-quote this token if you really want it here)"); + } + + addPathText(buf, false, text); + } + } + + PathBuilder pb = new PathBuilder(pathTokens); + for (Element e : buf) { + if (e.sb.length() == 0 && !e.canBeEmpty) { + throw new ConfigException.BadPath( + origin, + originalText, + "path has a leading, trailing, or two adjacent period '.' (use quoted \"\" empty string if you want an empty element)"); + } else { + pb.appendKey(e.sb.toString()); + } + } + + return pb.result(); + } + + private static void addPathText(List buf, boolean wasQuoted, + String newText) { + int i = wasQuoted ? -1 : newText.indexOf('.'); + Element current = buf.get(buf.size() - 1); + if (i < 0) { + // add to current path element + current.sb.append(newText); + // any empty quoted string means this element can + // now be empty. + if (wasQuoted && current.sb.length() == 0) + current.canBeEmpty = true; + } else { + // "buf" plus up to the period is an element + current.sb.append(newText.substring(0, i)); + // then start a new element + buf.add(new Element("", false)); + // recurse to consume remainder of newText + addPathText(buf, false, newText.substring(i + 1)); + } + } + + // the idea is to see if the string has any chars or features + // that might require the full parser to deal with. + private static boolean looksUnsafeForFastParser(String s) { + boolean lastWasDot = true; // start of path is also a "dot" + int len = s.length(); + if (s.isEmpty()) + return true; + if (s.charAt(0) == '.') + return true; + if (s.charAt(len - 1) == '.') + return true; + + for (int i = 0; i < len; ++i) { + char c = s.charAt(i); + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') { + lastWasDot = false; + continue; + } else if (c == '.') { + if (lastWasDot) + return true; // ".." means we need to throw an error + lastWasDot = true; + } else if (c == '-') { + if (lastWasDot) + return true; + continue; + } else { + return true; + } + } + + if (lastWasDot) + return true; + + return false; + } + + private static Path fastPathBuild(Path tail, String s, int end) { + // lastIndexOf takes last index it should look at, end - 1 not end + int splitAt = s.lastIndexOf('.', end - 1); + ArrayList tokens = new ArrayList(); + tokens.add(Tokens.newUnquotedText(null, s)); + // this works even if splitAt is -1; then we start the substring at 0 + Path withOneMoreElement = new Path(s.substring(splitAt + 1, end), tail, tokens); + if (splitAt < 0) { + return withOneMoreElement; + } else { + return fastPathBuild(withOneMoreElement, s, splitAt); + } + } + + // do something much faster than the full parser if + // we just have something like "foo" or "foo.bar" + private static Path speculativeFastParsePath(String path) { + String s = ConfigImplUtil.unicodeTrim(path); + if (looksUnsafeForFastParser(s)) + return null; + + return fastPathBuild(null, s, s.length()); + } +} diff --git a/config/src/test/scala/com/typesafe/config/impl/ConfParserTest.scala b/config/src/test/scala/com/typesafe/config/impl/ConfParserTest.scala index ecfe64bc..c68c1fd3 100644 --- a/config/src/test/scala/com/typesafe/config/impl/ConfParserTest.scala +++ b/config/src/test/scala/com/typesafe/config/impl/ConfParserTest.scala @@ -90,7 +90,7 @@ class ConfParserTest extends TestUtils { // also parse with the standalone path parser and be sure the // outcome is the same. try { - val shouldBeSame = Parser.parsePath(s) + val shouldBeSame = PathParser.parsePath(s) assertEquals(result, shouldBeSame) } catch { case e: ConfigException => diff --git a/config/src/test/scala/com/typesafe/config/impl/PathTest.scala b/config/src/test/scala/com/typesafe/config/impl/PathTest.scala index 3ffb285a..20a4d626 100644 --- a/config/src/test/scala/com/typesafe/config/impl/PathTest.scala +++ b/config/src/test/scala/com/typesafe/config/impl/PathTest.scala @@ -73,8 +73,8 @@ class PathTest extends TestUtils { for (t <- tests) { assertEquals(t.expected, t.path.render()) - assertEquals(t.path, Parser.parsePath(t.expected)) - assertEquals(t.path, Parser.parsePath(t.path.render())) + assertEquals(t.path, PathParser.parsePath(t.expected)) + assertEquals(t.path, PathParser.parsePath(t.path.render())) } } diff --git a/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala b/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala index 3d7cfcb4..85eec7e3 100644 --- a/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala +++ b/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala @@ -668,7 +668,7 @@ abstract trait TestUtils { } def configNodeKey(path: String) = { - val parsedPath = Parser.parsePath(path) + val parsedPath = PathParser.parsePath(path) new ConfigNodeKey(parsedPath) }