diff --git a/HOCON.md b/HOCON.md index 13bf87d0..ccb0e351 100644 --- a/HOCON.md +++ b/HOCON.md @@ -116,13 +116,14 @@ defined as follows: - any Unicode space separator (Zs category), line separator (Zl category), or paragraph separator (Zp category), including nonbreaking spaces (such as 0x00A0, 0x2007, and 0x202F). + The BOM (0xFEFF) must also be treated as whitespace. - tab (`\t` 0x0009), newline ('\n' 0x000A), vertical tab ('\v' 0x000B)`, form feed (`\f' 0x000C), carriage return ('\r' 0x000D), file separator (0x001C), group separator (0x001D), record separator (0x001E), unit separator (0x001F). In Java, the `isWhitespace()` method covers these characters with -the exception of nonbreaking spaces. +the exception of nonbreaking spaces and the BOM. While all Unicode separators should be treated as whitespace, in this spec "newline" refers only and specifically to ASCII newline diff --git a/config/src/main/java/com/typesafe/config/impl/ConfigImplUtil.java b/config/src/main/java/com/typesafe/config/impl/ConfigImplUtil.java index 31e1c409..5ea65537 100644 --- a/config/src/main/java/com/typesafe/config/impl/ConfigImplUtil.java +++ b/config/src/main/java/com/typesafe/config/impl/ConfigImplUtil.java @@ -105,6 +105,10 @@ final public class ConfigImplUtil { case '\u00A0': case '\u2007': case '\u202F': + // this one is the BOM, see + // http://www.unicode.org/faq/utf_bom.html#BOM + // we just accept it as a zero-width nonbreaking space. + case '\uFEFF': return true; default: return Character.isWhitespace(codepoint); diff --git a/config/src/test/resources/bom.conf b/config/src/test/resources/bom.conf new file mode 100644 index 00000000..206fcda8 --- /dev/null +++ b/config/src/test/resources/bom.conf @@ -0,0 +1,2 @@ +# +foo = bar diff --git a/config/src/test/scala/com/typesafe/config/impl/ConfParserTest.scala b/config/src/test/scala/com/typesafe/config/impl/ConfParserTest.scala index 8c8c6f12..c2bd638f 100644 --- a/config/src/test/scala/com/typesafe/config/impl/ConfParserTest.scala +++ b/config/src/test/scala/com/typesafe/config/impl/ConfParserTest.scala @@ -614,4 +614,32 @@ class ConfParserTest extends TestUtils { assertTrue("including basename URL doesn't load anything", conf.isEmpty()) } + + @Test + def acceptBOMStartingFile() { + // BOM at start of file should be ignored + val conf = ConfigFactory.parseResources("bom.conf") + assertEquals("bar", conf.getString("foo")) + } + + @Test + def acceptBOMStartOfStringConfig() { + // BOM at start of file is just whitespace, so ignored + val conf = ConfigFactory.parseString("\uFEFFfoo=bar") + assertEquals("bar", conf.getString("foo")) + } + + @Test + def acceptBOMInStringValue() { + // BOM inside quotes should be preserved, just as other whitespace would be + val conf = ConfigFactory.parseString("foo=\"\uFEFF\uFEFF\"") + assertEquals("\uFEFF\uFEFF", conf.getString("foo")) + } + + @Test + def acceptBOMWhitespace() { + // BOM here should be treated like other whitespace (ignored, since no quotes) + val conf = ConfigFactory.parseString("foo= \uFEFFbar\uFEFF") + assertEquals("bar", conf.getString("foo")) + } }