mirror of
https://github.com/lightbend/config.git
synced 2025-02-21 08:40:51 +08:00
Treat BOM as whitespace, fixes #75
This commit is contained in:
parent
369e9ff1e2
commit
7231e14886
3
HOCON.md
3
HOCON.md
@ -116,13 +116,14 @@ defined as follows:
|
|||||||
- any Unicode space separator (Zs category), line separator (Zl
|
- any Unicode space separator (Zs category), line separator (Zl
|
||||||
category), or paragraph separator (Zp category), including
|
category), or paragraph separator (Zp category), including
|
||||||
nonbreaking spaces (such as 0x00A0, 0x2007, and 0x202F).
|
nonbreaking spaces (such as 0x00A0, 0x2007, and 0x202F).
|
||||||
|
The BOM (0xFEFF) must also be treated as whitespace.
|
||||||
- tab (`\t` 0x0009), newline ('\n' 0x000A), vertical tab ('\v'
|
- tab (`\t` 0x0009), newline ('\n' 0x000A), vertical tab ('\v'
|
||||||
0x000B)`, form feed (`\f' 0x000C), carriage return ('\r'
|
0x000B)`, form feed (`\f' 0x000C), carriage return ('\r'
|
||||||
0x000D), file separator (0x001C), group separator (0x001D),
|
0x000D), file separator (0x001C), group separator (0x001D),
|
||||||
record separator (0x001E), unit separator (0x001F).
|
record separator (0x001E), unit separator (0x001F).
|
||||||
|
|
||||||
In Java, the `isWhitespace()` method covers these characters with
|
In Java, the `isWhitespace()` method covers these characters with
|
||||||
the exception of nonbreaking spaces.
|
the exception of nonbreaking spaces and the BOM.
|
||||||
|
|
||||||
While all Unicode separators should be treated as whitespace, in
|
While all Unicode separators should be treated as whitespace, in
|
||||||
this spec "newline" refers only and specifically to ASCII newline
|
this spec "newline" refers only and specifically to ASCII newline
|
||||||
|
@ -105,6 +105,10 @@ final public class ConfigImplUtil {
|
|||||||
case '\u00A0':
|
case '\u00A0':
|
||||||
case '\u2007':
|
case '\u2007':
|
||||||
case '\u202F':
|
case '\u202F':
|
||||||
|
// this one is the BOM, see
|
||||||
|
// http://www.unicode.org/faq/utf_bom.html#BOM
|
||||||
|
// we just accept it as a zero-width nonbreaking space.
|
||||||
|
case '\uFEFF':
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
return Character.isWhitespace(codepoint);
|
return Character.isWhitespace(codepoint);
|
||||||
|
2
config/src/test/resources/bom.conf
Normal file
2
config/src/test/resources/bom.conf
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
#
|
||||||
|
foo = bar
|
@ -614,4 +614,32 @@ class ConfParserTest extends TestUtils {
|
|||||||
|
|
||||||
assertTrue("including basename URL doesn't load anything", conf.isEmpty())
|
assertTrue("including basename URL doesn't load anything", conf.isEmpty())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
def acceptBOMStartingFile() {
|
||||||
|
// BOM at start of file should be ignored
|
||||||
|
val conf = ConfigFactory.parseResources("bom.conf")
|
||||||
|
assertEquals("bar", conf.getString("foo"))
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
def acceptBOMStartOfStringConfig() {
|
||||||
|
// BOM at start of file is just whitespace, so ignored
|
||||||
|
val conf = ConfigFactory.parseString("\uFEFFfoo=bar")
|
||||||
|
assertEquals("bar", conf.getString("foo"))
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
def acceptBOMInStringValue() {
|
||||||
|
// BOM inside quotes should be preserved, just as other whitespace would be
|
||||||
|
val conf = ConfigFactory.parseString("foo=\"\uFEFF\uFEFF\"")
|
||||||
|
assertEquals("\uFEFF\uFEFF", conf.getString("foo"))
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
def acceptBOMWhitespace() {
|
||||||
|
// BOM here should be treated like other whitespace (ignored, since no quotes)
|
||||||
|
val conf = ConfigFactory.parseString("foo= \uFEFFbar\uFEFF")
|
||||||
|
assertEquals("bar", conf.getString("foo"))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user