Extract Path parsing into new class

Extract the logic to parse a Path out of the Parser and into a new PathParser class.
2025-01-15 23:01:05 +08:00 · 2015-03-11 13:59:57 -07:00 · 2015-03-11 13:59:57 -07:00 · b19e38f29b
commit b19e38f29b
parent 0a804deff5
6 changed files with 221 additions and 205 deletions
--- a/config/src/main/java/com/typesafe/config/impl/Parser.java
+++ b/config/src/main/java/com/typesafe/config/impl/Parser.java
@ -382,7 +382,7 @@ final class Parser {
        private static SubstitutionExpression tokenToSubstitutionExpression(Token valueToken) {
            List<Token> expression = Tokens.getSubstitutionPathExpression(valueToken);
-            Path path = parsePathExpression(expression.iterator(), valueToken.origin());
+            Path path = PathParser.parsePathExpression(expression.iterator(), valueToken.origin());
            boolean optional = Tokens.getSubstitutionOptional(valueToken);
            return new SubstitutionExpression(path, optional);
@ -604,7 +604,7 @@ final class Parser {
                }
                putBack(t); // put back the token we ended with
-                return parsePathExpression(expression.iterator(), lineOrigin());
+                return PathParser.parsePathExpression(expression.iterator(), lineOrigin());
            }
        }
@ -1016,202 +1016,4 @@ final class Parser {
            }
        }
    }
    static class Element {
        StringBuilder sb;
        // an element can be empty if it has a quoted empty string "" in it
        boolean canBeEmpty;
        Element(String initial, boolean canBeEmpty) {
            this.canBeEmpty = canBeEmpty;
            this.sb = new StringBuilder(initial);
        }
        @Override
        public String toString() {
            return "Element(" + sb.toString() + "," + canBeEmpty + ")";
        }
    }
    private static void addPathText(List<Element> buf, boolean wasQuoted,
            String newText) {
        int i = wasQuoted ? -1 : newText.indexOf('.');
        Element current = buf.get(buf.size() - 1);
        if (i < 0) {
            // add to current path element
            current.sb.append(newText);
            // any empty quoted string means this element can
            // now be empty.
            if (wasQuoted && current.sb.length() == 0)
                current.canBeEmpty = true;
        } else {
            // "buf" plus up to the period is an element
            current.sb.append(newText.substring(0, i));
            // then start a new element
            buf.add(new Element("", false));
            // recurse to consume remainder of newText
            addPathText(buf, false, newText.substring(i + 1));
        }
    }
    private static Path parsePathExpression(Iterator<Token> expression,
            ConfigOrigin origin) {
        return parsePathExpression(expression, origin, null);
    }
    // originalText may be null if not available
    private static Path parsePathExpression(Iterator<Token> expression,
            ConfigOrigin origin, String originalText) {
        // each builder in "buf" is an element in the path.
        ArrayList<Token> pathTokens = new ArrayList<Token>();
        List<Element> buf = new ArrayList<Element>();
        buf.add(new Element("", false));
        if (!expression.hasNext()) {
            throw new ConfigException.BadPath(origin, originalText,
                    "Expecting a field name or path here, but got nothing");
        }
        while (expression.hasNext()) {
            Token t = expression.next();
            pathTokens.add(t);
            // Ignore all IgnoredWhitespace tokens
            if (Tokens.isIgnoredWhitespace(t))
                continue;
            if (Tokens.isValueWithType(t, ConfigValueType.STRING)) {
                AbstractConfigValue v = Tokens.getValue(t);
                // this is a quoted string; so any periods
                // in here don't count as path separators
                String s = v.transformToString();
                addPathText(buf, true, s);
            } else if (t == Tokens.END) {
                // ignore this; when parsing a file, it should not happen
                // since we're parsing a token list rather than the main
                // token iterator, and when parsing a path expression from the
                // API, it's expected to have an END.
            } else {
                // any periods outside of a quoted string count as
                // separators
                String text;
                if (Tokens.isValue(t)) {
                    // appending a number here may add
                    // a period, but we _do_ count those as path
                    // separators, because we basically want
                    // "foo 3.0bar" to parse as a string even
                    // though there's a number in it. The fact that
                    // we tokenize non-string values is largely an
                    // implementation detail.
                    AbstractConfigValue v = Tokens.getValue(t);
                    text = v.transformToString();
                } else if (Tokens.isUnquotedText(t)) {
                    text = Tokens.getUnquotedText(t);
                } else {
                    throw new ConfigException.BadPath(
                            origin,
                            originalText,
                            "Token not allowed in path expression: "
                                    + t
                                    + " (you can double-quote this token if you really want it here)");
                }
                addPathText(buf, false, text);
            }
        }
        PathBuilder pb = new PathBuilder(pathTokens);
        for (Element e : buf) {
            if (e.sb.length() == 0 && !e.canBeEmpty) {
                throw new ConfigException.BadPath(
                        origin,
                        originalText,
                        "path has a leading, trailing, or two adjacent period '.' (use quoted \"\" empty string if you want an empty element)");
            } else {
                pb.appendKey(e.sb.toString());
            }
        }
        return pb.result();
    }
    static ConfigOrigin apiOrigin = SimpleConfigOrigin.newSimple("path parameter");
    static Path parsePath(String path) {
        Path speculated = speculativeFastParsePath(path);
        if (speculated != null)
            return speculated;
        StringReader reader = new StringReader(path);
        try {
            Iterator<Token> tokens = Tokenizer.tokenize(apiOrigin, reader,
                    ConfigSyntax.CONF);
            tokens.next(); // drop START
            return parsePathExpression(tokens, apiOrigin, path);
        } finally {
            reader.close();
        }
    }
    // the idea is to see if the string has any chars or features
    // that might require the full parser to deal with.
    private static boolean looksUnsafeForFastParser(String s) {
        boolean lastWasDot = true; // start of path is also a "dot"
        int len = s.length();
        if (s.isEmpty())
            return true;
        if (s.charAt(0) == '.')
            return true;
        if (s.charAt(len - 1) == '.')
            return true;
        for (int i = 0; i < len; ++i) {
            char c = s.charAt(i);
            if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') {
                lastWasDot = false;
                continue;
            } else if (c == '.') {
                if (lastWasDot)
                    return true; // ".." means we need to throw an error
                lastWasDot = true;
            } else if (c == '-') {
                if (lastWasDot)
                    return true;
                continue;
            } else {
                return true;
            }
        }
        if (lastWasDot)
            return true;
        return false;
    }
    private static Path fastPathBuild(Path tail, String s, int end) {
        // lastIndexOf takes last index it should look at, end - 1 not end
        int splitAt = s.lastIndexOf('.', end - 1);
        ArrayList<Token> tokens = new ArrayList<Token>();
        tokens.add(Tokens.newUnquotedText(null, s));
        // this works even if splitAt is -1; then we start the substring at 0
        Path withOneMoreElement = new Path(s.substring(splitAt + 1, end), tail, tokens);
        if (splitAt < 0) {
            return withOneMoreElement;
        } else {
            return fastPathBuild(withOneMoreElement, s, splitAt);
        }
    }
    // do something much faster than the full parser if
    // we just have something like "foo" or "foo.bar"
    private static Path speculativeFastParsePath(String path) {
        String s = ConfigImplUtil.unicodeTrim(path);
        if (looksUnsafeForFastParser(s))
            return null;
        return fastPathBuild(null, s, s.length());
    }
 }
--- a/config/src/main/java/com/typesafe/config/impl/Path.java
+++ b/config/src/main/java/com/typesafe/config/impl/Path.java
@ -245,6 +245,6 @@ final class Path {
    }
    static Path newPath(String path) {
-        return Parser.parsePath(path);
+        return PathParser.parsePath(path);
    }
 }
--- a/config/src/main/java/com/typesafe/config/impl/PathParser.java
+++ b/config/src/main/java/com/typesafe/config/impl/PathParser.java
@ -0,0 +1,214 @@
 /**
 *   Copyright (C) 2015 Typesafe Inc. <http://typesafe.com>
 */
 package com.typesafe.config.impl;
 import com.typesafe.config.ConfigException;
 import com.typesafe.config.ConfigOrigin;
 import com.typesafe.config.ConfigSyntax;
 import com.typesafe.config.ConfigValueType;
 import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 final class PathParser {
    static class Element {
        StringBuilder sb;
        // an element can be empty if it has a quoted empty string "" in it
        boolean canBeEmpty;
        Element(String initial, boolean canBeEmpty) {
            this.canBeEmpty = canBeEmpty;
            this.sb = new StringBuilder(initial);
        }
        @Override
        public String toString() {
            return "Element(" + sb.toString() + "," + canBeEmpty + ")";
        }
    }
    static ConfigOrigin apiOrigin = SimpleConfigOrigin.newSimple("path parameter");
    static Path parsePath(String path) {
        Path speculated = speculativeFastParsePath(path);
        if (speculated != null)
            return speculated;
        StringReader reader = new StringReader(path);
        try {
            Iterator<Token> tokens = Tokenizer.tokenize(apiOrigin, reader,
                    ConfigSyntax.CONF);
            tokens.next(); // drop START
            return parsePathExpression(tokens, apiOrigin, path);
        } finally {
            reader.close();
        }
    }
    protected static Path parsePathExpression(Iterator<Token> expression,
                                            ConfigOrigin origin) {
        return parsePathExpression(expression, origin, null);
    }
    // originalText may be null if not available
    protected static Path parsePathExpression(Iterator<Token> expression,
                                            ConfigOrigin origin, String originalText) {
        // each builder in "buf" is an element in the path.
        ArrayList<Token> pathTokens = new ArrayList<Token>();
        List<Element> buf = new ArrayList<Element>();
        buf.add(new Element("", false));
        if (!expression.hasNext()) {
            throw new ConfigException.BadPath(origin, originalText,
                    "Expecting a field name or path here, but got nothing");
        }
        while (expression.hasNext()) {
            Token t = expression.next();
            pathTokens.add(t);
            // Ignore all IgnoredWhitespace tokens
            if (Tokens.isIgnoredWhitespace(t))
                continue;
            if (Tokens.isValueWithType(t, ConfigValueType.STRING)) {
                AbstractConfigValue v = Tokens.getValue(t);
                // this is a quoted string; so any periods
                // in here don't count as path separators
                String s = v.transformToString();
                addPathText(buf, true, s);
            } else if (t == Tokens.END) {
                // ignore this; when parsing a file, it should not happen
                // since we're parsing a token list rather than the main
                // token iterator, and when parsing a path expression from the
                // API, it's expected to have an END.
            } else {
                // any periods outside of a quoted string count as
                // separators
                String text;
                if (Tokens.isValue(t)) {
                    // appending a number here may add
                    // a period, but we _do_ count those as path
                    // separators, because we basically want
                    // "foo 3.0bar" to parse as a string even
                    // though there's a number in it. The fact that
                    // we tokenize non-string values is largely an
                    // implementation detail.
                    AbstractConfigValue v = Tokens.getValue(t);
                    text = v.transformToString();
                } else if (Tokens.isUnquotedText(t)) {
                    text = Tokens.getUnquotedText(t);
                } else {
                    throw new ConfigException.BadPath(
                            origin,
                            originalText,
                            "Token not allowed in path expression: "
                                    + t
                                    + " (you can double-quote this token if you really want it here)");
                }
                addPathText(buf, false, text);
            }
        }
        PathBuilder pb = new PathBuilder(pathTokens);
        for (Element e : buf) {
            if (e.sb.length() == 0 && !e.canBeEmpty) {
                throw new ConfigException.BadPath(
                        origin,
                        originalText,
                        "path has a leading, trailing, or two adjacent period '.' (use quoted \"\" empty string if you want an empty element)");
            } else {
                pb.appendKey(e.sb.toString());
            }
        }
        return pb.result();
    }
    private static void addPathText(List<Element> buf, boolean wasQuoted,
                                    String newText) {
        int i = wasQuoted ? -1 : newText.indexOf('.');
        Element current = buf.get(buf.size() - 1);
        if (i < 0) {
            // add to current path element
            current.sb.append(newText);
            // any empty quoted string means this element can
            // now be empty.
            if (wasQuoted && current.sb.length() == 0)
                current.canBeEmpty = true;
        } else {
            // "buf" plus up to the period is an element
            current.sb.append(newText.substring(0, i));
            // then start a new element
            buf.add(new Element("", false));
            // recurse to consume remainder of newText
            addPathText(buf, false, newText.substring(i + 1));
        }
    }
    // the idea is to see if the string has any chars or features
    // that might require the full parser to deal with.
    private static boolean looksUnsafeForFastParser(String s) {
        boolean lastWasDot = true; // start of path is also a "dot"
        int len = s.length();
        if (s.isEmpty())
            return true;
        if (s.charAt(0) == '.')
            return true;
        if (s.charAt(len - 1) == '.')
            return true;
        for (int i = 0; i < len; ++i) {
            char c = s.charAt(i);
            if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') {
                lastWasDot = false;
                continue;
            } else if (c == '.') {
                if (lastWasDot)
                    return true; // ".." means we need to throw an error
                lastWasDot = true;
            } else if (c == '-') {
                if (lastWasDot)
                    return true;
                continue;
            } else {
                return true;
            }
        }
        if (lastWasDot)
            return true;
        return false;
    }
    private static Path fastPathBuild(Path tail, String s, int end) {
        // lastIndexOf takes last index it should look at, end - 1 not end
        int splitAt = s.lastIndexOf('.', end - 1);
        ArrayList<Token> tokens = new ArrayList<Token>();
        tokens.add(Tokens.newUnquotedText(null, s));
        // this works even if splitAt is -1; then we start the substring at 0
        Path withOneMoreElement = new Path(s.substring(splitAt + 1, end), tail, tokens);
        if (splitAt < 0) {
            return withOneMoreElement;
        } else {
            return fastPathBuild(withOneMoreElement, s, splitAt);
        }
    }
    // do something much faster than the full parser if
    // we just have something like "foo" or "foo.bar"
    private static Path speculativeFastParsePath(String path) {
        String s = ConfigImplUtil.unicodeTrim(path);
        if (looksUnsafeForFastParser(s))
            return null;
        return fastPathBuild(null, s, s.length());
    }
 }
--- a/config/src/test/scala/com/typesafe/config/impl/ConfParserTest.scala
+++ b/config/src/test/scala/com/typesafe/config/impl/ConfParserTest.scala
@ -90,7 +90,7 @@ class ConfParserTest extends TestUtils {
        // also parse with the standalone path parser and be sure the
        // outcome is the same.
        try {
-            val shouldBeSame = Parser.parsePath(s)
+            val shouldBeSame = PathParser.parsePath(s)
            assertEquals(result, shouldBeSame)
        } catch {
            case e: ConfigException =>
--- a/config/src/test/scala/com/typesafe/config/impl/PathTest.scala
+++ b/config/src/test/scala/com/typesafe/config/impl/PathTest.scala
@ -73,8 +73,8 @@ class PathTest extends TestUtils {
        for (t <- tests) {
            assertEquals(t.expected, t.path.render())
-            assertEquals(t.path, Parser.parsePath(t.expected))
+            assertEquals(t.path, PathParser.parsePath(t.expected))
-            assertEquals(t.path, Parser.parsePath(t.path.render()))
+            assertEquals(t.path, PathParser.parsePath(t.path.render()))
        }
    }
--- a/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala
+++ b/config/src/test/scala/com/typesafe/config/impl/TestUtils.scala
@ -668,7 +668,7 @@ abstract trait TestUtils {
    }
    def configNodeKey(path: String) = {
-        val parsedPath = Parser.parsePath(path)
+        val parsedPath = PathParser.parsePath(path)
        new ConfigNodeKey(parsedPath)
    }