Treat BOM as whitespace, fixes #75

This commit is contained in:
Havoc Pennington 2013-06-21 10:59:38 -04:00
parent 369e9ff1e2
commit 7231e14886
4 changed files with 36 additions and 1 deletions

View File

@ -116,13 +116,14 @@ defined as follows:
- any Unicode space separator (Zs category), line separator (Zl
category), or paragraph separator (Zp category), including
nonbreaking spaces (such as 0x00A0, 0x2007, and 0x202F).
The BOM (0xFEFF) must also be treated as whitespace.
- tab (`\t` 0x0009), newline ('\n' 0x000A), vertical tab ('\v'
0x000B)`, form feed (`\f' 0x000C), carriage return ('\r'
0x000D), file separator (0x001C), group separator (0x001D),
record separator (0x001E), unit separator (0x001F).
In Java, the `isWhitespace()` method covers these characters with
the exception of nonbreaking spaces.
the exception of nonbreaking spaces and the BOM.
While all Unicode separators should be treated as whitespace, in
this spec "newline" refers only and specifically to ASCII newline

View File

@ -105,6 +105,10 @@ final public class ConfigImplUtil {
case '\u00A0':
case '\u2007':
case '\u202F':
// this one is the BOM, see
// http://www.unicode.org/faq/utf_bom.html#BOM
// we just accept it as a zero-width nonbreaking space.
case '\uFEFF':
return true;
default:
return Character.isWhitespace(codepoint);

View File

@ -0,0 +1,2 @@
#
foo = bar

View File

@ -614,4 +614,32 @@ class ConfParserTest extends TestUtils {
assertTrue("including basename URL doesn't load anything", conf.isEmpty())
}
@Test
def acceptBOMStartingFile() {
// BOM at start of file should be ignored
val conf = ConfigFactory.parseResources("bom.conf")
assertEquals("bar", conf.getString("foo"))
}
@Test
def acceptBOMStartOfStringConfig() {
// BOM at start of file is just whitespace, so ignored
val conf = ConfigFactory.parseString("\uFEFFfoo=bar")
assertEquals("bar", conf.getString("foo"))
}
@Test
def acceptBOMInStringValue() {
// BOM inside quotes should be preserved, just as other whitespace would be
val conf = ConfigFactory.parseString("foo=\"\uFEFF\uFEFF\"")
assertEquals("\uFEFF\uFEFF", conf.getString("foo"))
}
@Test
def acceptBOMWhitespace() {
// BOM here should be treated like other whitespace (ignored, since no quotes)
val conf = ConfigFactory.parseString("foo= \uFEFFbar\uFEFF")
assertEquals("bar", conf.getString("foo"))
}
}