From 00f8b3d507d58d8981aa867879ae2798ff89b0a2 Mon Sep 17 00:00:00 2001 From: Havoc Pennington Date: Fri, 25 Nov 2011 12:08:07 -0500 Subject: [PATCH] Prohibit additional chars in unquoted strings. The general idea here is to permit future syntax extensions; as a rule, new syntax can only be added by using one of these characters that can't be a string. Backtick ` in particular is in the current ECMAScript drafts for multiline strings, so supporting multiline strings with that would be a very natural thing to do. The other characters added here (^, !, ?, @, *, &) are more or less arbitrary. &, ! and * have special meaning in YAML so might be natural ones to use for similar purposes. ? would have some natural "let this be missing" meaning, @ is often used in template-like things, and ^ could be used for some unforeseen need. --- HOCON.md | 12 ++++++------ .../java/com/typesafe/config/impl/Tokenizer.java | 2 +- .../scala/com/typesafe/config/impl/TestUtils.scala | 7 +++++++ 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/HOCON.md b/HOCON.md index c8d13f07..472d0ed7 100644 --- a/HOCON.md +++ b/HOCON.md @@ -188,8 +188,8 @@ A sequence of characters outside of a quoted string is a string value if: - it does not contain "forbidden characters" '$', '"', '{', '}', - '[', ']', ':', '=', ',', '+', '#', '\' (backslash), or - whitespace. + '[', ']', ':', '=', ',', '+', '#', '`', '^', '?', '!', '@', + '*', '&', '\' (backslash), or whitespace. - it does not contain the two-character string "//" (which starts a comment) - its initial characters do not parse as `true`, `false`, `null`, @@ -225,6 +225,10 @@ newline). This rule is from the JSON spec. However, unquoted strings have no restriction on control characters, other than the ones listed as "forbidden characters" above. +Some of the "forbidden characters" are forbidden because they +already have meaning in JSON or HOCON, others are essentially +reserved keywords to allow future extensions to this spec. + ### Value concatenation The value of an object field or an array element may consist of @@ -1018,7 +1022,3 @@ Environment variables are interpreted as follows: - environment variables always become a string value, though if an app asks for another type automatic type conversion would kick in - -## Open issues - - - should a few more special characters be banned from unquoted strings, to allow future extensions? diff --git a/src/main/java/com/typesafe/config/impl/Tokenizer.java b/src/main/java/com/typesafe/config/impl/Tokenizer.java index 147917bf..fa2c11e8 100644 --- a/src/main/java/com/typesafe/config/impl/Tokenizer.java +++ b/src/main/java/com/typesafe/config/impl/Tokenizer.java @@ -228,7 +228,7 @@ final class Tokenizer { // chars JSON allows to be part of a number static final String numberChars = "0123456789eE+-."; // chars that stop an unquoted string - static final String notInUnquotedText = "$\"{}[]:=,\\+#"; + static final String notInUnquotedText = "$\"{}[]:=,+#`^?!@*&\\"; // The rules here are intended to maximize convenience while // avoiding confusion with real valid JSON. Basically anything diff --git a/src/test/scala/com/typesafe/config/impl/TestUtils.scala b/src/test/scala/com/typesafe/config/impl/TestUtils.scala index 3a580e1c..543fcb93 100644 --- a/src/test/scala/com/typesafe/config/impl/TestUtils.scala +++ b/src/test/scala/com/typesafe/config/impl/TestUtils.scala @@ -156,6 +156,13 @@ abstract trait TestUtils { "[ = ]", // = is not a valid token in unquoted text "[ + ]", "[ # ]", + "[ ` ]", + "[ ^ ]", + "[ ? ]", + "[ ! ]", + "[ @ ]", + "[ * ]", + "[ & ]", "[ \\ ]", ParseTest(true, "[ \"foo\nbar\" ]"), // unescaped newline in quoted string, lift doesn't care "[ # comment ]",