mirror of
https://github.com/lightbend/config.git
synced 2025-03-23 07:40:25 +08:00
make whitespace handling Unicode-correct.
This is maybe a little above and beyond what will ever matter in practice.
This commit is contained in:
parent
b0fdc6456c
commit
7b0e5a1471
@ -3,6 +3,7 @@ package com.typesafe.config;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import com.typesafe.config.impl.ConfigImpl;
|
||||
import com.typesafe.config.impl.ConfigUtil;
|
||||
|
||||
/**
|
||||
* This class holds some global static methods for the config package.
|
||||
@ -68,10 +69,11 @@ public final class Config {
|
||||
*/
|
||||
public static long parseDuration(String input,
|
||||
ConfigOrigin originForException, String pathForException) {
|
||||
String s = input.trim();
|
||||
String s = ConfigUtil.unicodeTrim(input);
|
||||
String originalUnitString = getUnits(s);
|
||||
String unitString = originalUnitString;
|
||||
String numberString = s.substring(0, s.length() - unitString.length()).trim();
|
||||
String numberString = ConfigUtil.unicodeTrim(s.substring(0, s.length()
|
||||
- unitString.length()));
|
||||
TimeUnit units = null;
|
||||
|
||||
// this would be caught later anyway, but the error message
|
||||
@ -150,7 +152,7 @@ public final class Config {
|
||||
*/
|
||||
public static long parseMemorySize(String input,
|
||||
ConfigOrigin originForException, String pathForException) {
|
||||
String s = input.trim();
|
||||
String s = ConfigUtil.unicodeTrim(input);
|
||||
String unitStringMaybePlural = getUnits(s);
|
||||
String unitString;
|
||||
if (unitStringMaybePlural.endsWith("s"))
|
||||
@ -159,9 +161,8 @@ public final class Config {
|
||||
else
|
||||
unitString = unitStringMaybePlural;
|
||||
String unitStringLower = unitString.toLowerCase();
|
||||
String numberString = s.substring(0,
|
||||
s.length() - unitStringMaybePlural.length())
|
||||
.trim();
|
||||
String numberString = ConfigUtil.unicodeTrim(s.substring(0, s.length()
|
||||
- unitStringMaybePlural.length()));
|
||||
|
||||
// this would be caught later anyway, but the error message
|
||||
// is more helpful if we check it here.
|
||||
|
@ -1,7 +1,8 @@
|
||||
package com.typesafe.config.impl;
|
||||
|
||||
|
||||
final class ConfigUtil {
|
||||
/** This is public just for the "config" package to use, don't touch it */
|
||||
final public class ConfigUtil {
|
||||
static boolean equalsHandlingNull(Object a, Object b) {
|
||||
if (a == null && b != null)
|
||||
return false;
|
||||
@ -50,4 +51,68 @@ final class ConfigUtil {
|
||||
sb.append('"');
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
static boolean isWhitespace(int codepoint) {
|
||||
switch (codepoint) {
|
||||
// try to hit the most common ASCII ones first, then the nonbreaking
|
||||
// spaces that Java brokenly leaves out of isWhitespace.
|
||||
case ' ':
|
||||
case '\n':
|
||||
case '\u00A0':
|
||||
case '\u2007':
|
||||
case '\u202F':
|
||||
return true;
|
||||
default:
|
||||
return Character.isWhitespace(codepoint);
|
||||
}
|
||||
}
|
||||
|
||||
/** This is public just for the "config" package to use, don't touch it! */
|
||||
public static String unicodeTrim(String s) {
|
||||
// this is dumb because it looks like there aren't any whitespace
|
||||
// characters that need surrogate encoding. But, points for
|
||||
// pedantic correctness! It's future-proof or something.
|
||||
// String.trim() actually is broken, since there are plenty of
|
||||
// non-ASCII whitespace characters.
|
||||
final int length = s.length();
|
||||
if (length == 0)
|
||||
return s;
|
||||
|
||||
int start = 0;
|
||||
while (true) {
|
||||
char c = s.charAt(start);
|
||||
if (c == ' ' || c == '\n') {
|
||||
start += 1;
|
||||
} else {
|
||||
int cp = s.codePointAt(start);
|
||||
if (isWhitespace(cp))
|
||||
start += Character.charCount(cp);
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int end = length;
|
||||
while (true) {
|
||||
char c = s.charAt(end - 1);
|
||||
if (c == ' ' || c == '\n') {
|
||||
--end;
|
||||
} else {
|
||||
int cp;
|
||||
int delta;
|
||||
if (Character.isLowSurrogate(c)) {
|
||||
cp = s.codePointAt(end - 2);
|
||||
delta = 2;
|
||||
} else {
|
||||
cp = s.codePointAt(end - 1);
|
||||
delta = 1;
|
||||
}
|
||||
if (isWhitespace(cp))
|
||||
end -= delta;
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
return s.substring(start, end);
|
||||
}
|
||||
}
|
||||
|
@ -384,7 +384,7 @@ final class Parser {
|
||||
|
||||
for (int i = 0; i < s.length(); ++i) {
|
||||
char c = s.charAt(i);
|
||||
if (!Character.isWhitespace(c))
|
||||
if (!ConfigUtil.isWhitespace(c))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -796,7 +796,7 @@ final class Parser {
|
||||
// do something much faster than the full parser if
|
||||
// we just have something like "foo" or "foo.bar"
|
||||
private static Path speculativeFastParsePath(String path) {
|
||||
String s = path.trim();
|
||||
String s = ConfigUtil.unicodeTrim(path);
|
||||
if (hasUnsafeChars(s))
|
||||
return null;
|
||||
if (s.startsWith(".") || s.endsWith(".") || s.contains(".."))
|
||||
|
@ -125,13 +125,11 @@ final class Tokenizer {
|
||||
}
|
||||
|
||||
static boolean isWhitespace(int c) {
|
||||
// hoping this optimizes slightly by catching the most common ' '
|
||||
// case up front.
|
||||
return c == ' ' || c == '\n' || Character.isWhitespace(c);
|
||||
return ConfigUtil.isWhitespace(c);
|
||||
}
|
||||
|
||||
static boolean isWhitespaceNotNewline(int c) {
|
||||
return c == ' ' || (c != '\n' && Character.isWhitespace(c));
|
||||
return c != '\n' && ConfigUtil.isWhitespace(c);
|
||||
}
|
||||
|
||||
private int slurpComment() {
|
||||
|
55
src/test/scala/com/typesafe/config/impl/UtilTest.scala
Normal file
55
src/test/scala/com/typesafe/config/impl/UtilTest.scala
Normal file
@ -0,0 +1,55 @@
|
||||
package com.typesafe.config.impl
|
||||
|
||||
import org.junit.Assert._
|
||||
import org.junit._
|
||||
|
||||
class UtilTest extends TestUtils {
|
||||
private lazy val supplementaryChars = {
|
||||
val sb = new java.lang.StringBuilder()
|
||||
val codepoints = Seq(
|
||||
0x2070E, 0x20731, 0x20779, 0x20C53, 0x20C78,
|
||||
0x20C96, 0x20CCF, 0x20CD5, 0x20D15, 0x20D7C)
|
||||
for (c <- codepoints) {
|
||||
sb.appendCodePoint(c)
|
||||
}
|
||||
assertTrue(sb.length() > codepoints.length)
|
||||
sb.toString()
|
||||
}
|
||||
|
||||
@Test
|
||||
def unicodeTrimSupplementaryChars() {
|
||||
assertEquals("", ConfigUtil.unicodeTrim(""))
|
||||
assertEquals("a", ConfigUtil.unicodeTrim("a"))
|
||||
assertEquals("abc", ConfigUtil.unicodeTrim("abc"))
|
||||
assertEquals(supplementaryChars, ConfigUtil.unicodeTrim(supplementaryChars))
|
||||
|
||||
val s = " \u00A0 \n " + supplementaryChars + " \n \u00A0 "
|
||||
val asciiTrimmed = s.trim()
|
||||
val unitrimmed = ConfigUtil.unicodeTrim(s)
|
||||
|
||||
assertFalse(asciiTrimmed.equals(unitrimmed))
|
||||
assertEquals(supplementaryChars, unitrimmed)
|
||||
}
|
||||
|
||||
@Test
|
||||
def definitionOfWhitespace() {
|
||||
assertTrue(ConfigUtil.isWhitespace(' '))
|
||||
assertTrue(ConfigUtil.isWhitespace('\n'))
|
||||
// these three are nonbreaking spaces
|
||||
assertTrue(ConfigUtil.isWhitespace('\u00A0'))
|
||||
assertTrue(ConfigUtil.isWhitespace('\u2007'))
|
||||
assertTrue(ConfigUtil.isWhitespace('\u202F'))
|
||||
// vertical tab, a weird one
|
||||
assertTrue(ConfigUtil.isWhitespace('\u000B'))
|
||||
// file separator, another weird one
|
||||
assertTrue(ConfigUtil.isWhitespace('\u001C'))
|
||||
}
|
||||
|
||||
@Test
|
||||
def equalsThatHandlesNull() {
|
||||
assertTrue(ConfigUtil.equalsHandlingNull(null, null))
|
||||
assertFalse(ConfigUtil.equalsHandlingNull(new Object(), null))
|
||||
assertFalse(ConfigUtil.equalsHandlingNull(null, new Object()))
|
||||
assertTrue(ConfigUtil.equalsHandlingNull("", ""))
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user