diff --git a/src/main/java/com/typesafe/config/Config.java b/src/main/java/com/typesafe/config/Config.java index 64ad7e2b..ce83ec62 100644 --- a/src/main/java/com/typesafe/config/Config.java +++ b/src/main/java/com/typesafe/config/Config.java @@ -3,6 +3,7 @@ package com.typesafe.config; import java.util.concurrent.TimeUnit; import com.typesafe.config.impl.ConfigImpl; +import com.typesafe.config.impl.ConfigUtil; /** * This class holds some global static methods for the config package. @@ -68,10 +69,11 @@ public final class Config { */ public static long parseDuration(String input, ConfigOrigin originForException, String pathForException) { - String s = input.trim(); + String s = ConfigUtil.unicodeTrim(input); String originalUnitString = getUnits(s); String unitString = originalUnitString; - String numberString = s.substring(0, s.length() - unitString.length()).trim(); + String numberString = ConfigUtil.unicodeTrim(s.substring(0, s.length() + - unitString.length())); TimeUnit units = null; // this would be caught later anyway, but the error message @@ -150,7 +152,7 @@ public final class Config { */ public static long parseMemorySize(String input, ConfigOrigin originForException, String pathForException) { - String s = input.trim(); + String s = ConfigUtil.unicodeTrim(input); String unitStringMaybePlural = getUnits(s); String unitString; if (unitStringMaybePlural.endsWith("s")) @@ -159,9 +161,8 @@ public final class Config { else unitString = unitStringMaybePlural; String unitStringLower = unitString.toLowerCase(); - String numberString = s.substring(0, - s.length() - unitStringMaybePlural.length()) - .trim(); + String numberString = ConfigUtil.unicodeTrim(s.substring(0, s.length() + - unitStringMaybePlural.length())); // this would be caught later anyway, but the error message // is more helpful if we check it here. diff --git a/src/main/java/com/typesafe/config/impl/ConfigUtil.java b/src/main/java/com/typesafe/config/impl/ConfigUtil.java index 39318fd6..65ae09af 100644 --- a/src/main/java/com/typesafe/config/impl/ConfigUtil.java +++ b/src/main/java/com/typesafe/config/impl/ConfigUtil.java @@ -1,7 +1,8 @@ package com.typesafe.config.impl; -final class ConfigUtil { +/** This is public just for the "config" package to use, don't touch it */ +final public class ConfigUtil { static boolean equalsHandlingNull(Object a, Object b) { if (a == null && b != null) return false; @@ -50,4 +51,68 @@ final class ConfigUtil { sb.append('"'); return sb.toString(); } + + static boolean isWhitespace(int codepoint) { + switch (codepoint) { + // try to hit the most common ASCII ones first, then the nonbreaking + // spaces that Java brokenly leaves out of isWhitespace. + case ' ': + case '\n': + case '\u00A0': + case '\u2007': + case '\u202F': + return true; + default: + return Character.isWhitespace(codepoint); + } + } + + /** This is public just for the "config" package to use, don't touch it! */ + public static String unicodeTrim(String s) { + // this is dumb because it looks like there aren't any whitespace + // characters that need surrogate encoding. But, points for + // pedantic correctness! It's future-proof or something. + // String.trim() actually is broken, since there are plenty of + // non-ASCII whitespace characters. + final int length = s.length(); + if (length == 0) + return s; + + int start = 0; + while (true) { + char c = s.charAt(start); + if (c == ' ' || c == '\n') { + start += 1; + } else { + int cp = s.codePointAt(start); + if (isWhitespace(cp)) + start += Character.charCount(cp); + else + break; + } + } + + int end = length; + while (true) { + char c = s.charAt(end - 1); + if (c == ' ' || c == '\n') { + --end; + } else { + int cp; + int delta; + if (Character.isLowSurrogate(c)) { + cp = s.codePointAt(end - 2); + delta = 2; + } else { + cp = s.codePointAt(end - 1); + delta = 1; + } + if (isWhitespace(cp)) + end -= delta; + else + break; + } + } + return s.substring(start, end); + } } diff --git a/src/main/java/com/typesafe/config/impl/Parser.java b/src/main/java/com/typesafe/config/impl/Parser.java index f1d182cb..5fc453ea 100644 --- a/src/main/java/com/typesafe/config/impl/Parser.java +++ b/src/main/java/com/typesafe/config/impl/Parser.java @@ -384,7 +384,7 @@ final class Parser { for (int i = 0; i < s.length(); ++i) { char c = s.charAt(i); - if (!Character.isWhitespace(c)) + if (!ConfigUtil.isWhitespace(c)) return false; } return true; @@ -796,7 +796,7 @@ final class Parser { // do something much faster than the full parser if // we just have something like "foo" or "foo.bar" private static Path speculativeFastParsePath(String path) { - String s = path.trim(); + String s = ConfigUtil.unicodeTrim(path); if (hasUnsafeChars(s)) return null; if (s.startsWith(".") || s.endsWith(".") || s.contains("..")) diff --git a/src/main/java/com/typesafe/config/impl/Tokenizer.java b/src/main/java/com/typesafe/config/impl/Tokenizer.java index 131d90a0..d7895090 100644 --- a/src/main/java/com/typesafe/config/impl/Tokenizer.java +++ b/src/main/java/com/typesafe/config/impl/Tokenizer.java @@ -125,13 +125,11 @@ final class Tokenizer { } static boolean isWhitespace(int c) { - // hoping this optimizes slightly by catching the most common ' ' - // case up front. - return c == ' ' || c == '\n' || Character.isWhitespace(c); + return ConfigUtil.isWhitespace(c); } static boolean isWhitespaceNotNewline(int c) { - return c == ' ' || (c != '\n' && Character.isWhitespace(c)); + return c != '\n' && ConfigUtil.isWhitespace(c); } private int slurpComment() { diff --git a/src/test/scala/com/typesafe/config/impl/UtilTest.scala b/src/test/scala/com/typesafe/config/impl/UtilTest.scala new file mode 100644 index 00000000..2aec06d9 --- /dev/null +++ b/src/test/scala/com/typesafe/config/impl/UtilTest.scala @@ -0,0 +1,55 @@ +package com.typesafe.config.impl + +import org.junit.Assert._ +import org.junit._ + +class UtilTest extends TestUtils { + private lazy val supplementaryChars = { + val sb = new java.lang.StringBuilder() + val codepoints = Seq( + 0x2070E, 0x20731, 0x20779, 0x20C53, 0x20C78, + 0x20C96, 0x20CCF, 0x20CD5, 0x20D15, 0x20D7C) + for (c <- codepoints) { + sb.appendCodePoint(c) + } + assertTrue(sb.length() > codepoints.length) + sb.toString() + } + + @Test + def unicodeTrimSupplementaryChars() { + assertEquals("", ConfigUtil.unicodeTrim("")) + assertEquals("a", ConfigUtil.unicodeTrim("a")) + assertEquals("abc", ConfigUtil.unicodeTrim("abc")) + assertEquals(supplementaryChars, ConfigUtil.unicodeTrim(supplementaryChars)) + + val s = " \u00A0 \n " + supplementaryChars + " \n \u00A0 " + val asciiTrimmed = s.trim() + val unitrimmed = ConfigUtil.unicodeTrim(s) + + assertFalse(asciiTrimmed.equals(unitrimmed)) + assertEquals(supplementaryChars, unitrimmed) + } + + @Test + def definitionOfWhitespace() { + assertTrue(ConfigUtil.isWhitespace(' ')) + assertTrue(ConfigUtil.isWhitespace('\n')) + // these three are nonbreaking spaces + assertTrue(ConfigUtil.isWhitespace('\u00A0')) + assertTrue(ConfigUtil.isWhitespace('\u2007')) + assertTrue(ConfigUtil.isWhitespace('\u202F')) + // vertical tab, a weird one + assertTrue(ConfigUtil.isWhitespace('\u000B')) + // file separator, another weird one + assertTrue(ConfigUtil.isWhitespace('\u001C')) + } + + @Test + def equalsThatHandlesNull() { + assertTrue(ConfigUtil.equalsHandlingNull(null, null)) + assertFalse(ConfigUtil.equalsHandlingNull(new Object(), null)) + assertFalse(ConfigUtil.equalsHandlingNull(null, new Object())) + assertTrue(ConfigUtil.equalsHandlingNull("", "")) + } +}