Change getMemorySizeInBytes to getBytes and support mebi/gibi/etc.

This commit is contained in:
Havoc Pennington 2011-11-25 11:56:59 -05:00
parent 1d62477338
commit 515a71ba66
5 changed files with 189 additions and 91 deletions

View File

@ -752,8 +752,8 @@ must be lowercase. Exactly these strings are supported:
### Size in bytes format
Implementations may wish to support a `getMemorySizeInBytes()`
returning a size in bytes.
Implementations may wish to support a `getBytes()` returning a
size in bytes.
This can use the general "units format" described above; bare
numbers are taken to be in bytes already, while strings are
@ -763,21 +763,49 @@ The one-letter unit strings may be uppercase (note: duration units
are always lowercase, so this convention is specific to size
units).
Exactly these strings are supported:
There is an unfortunate nightmare with size-in-bytes units, that
they may be in powers or two or powers of ten. The approach
defined by standards bodies appears to differ from common usage,
such that following the standard leads to people being confused.
Worse, common usage varies based on whether people are talking
about RAM or disk sizes, and various existing operating systems
and apps do all kinds of different things. See
http://en.wikipedia.org/wiki/Binary_prefix#Deviation_between_powers_of_1024_and_powers_of_1000
for examples. It appears impossible to sort this out without
causing confusion for someone sometime.
For single bytes, exactly these strings are supported:
- `B`, `b`, `byte`, `bytes`
- `K`, `k`, `kilobyte`, `kilobytes`
- `M`, `m`, `megabyte`, `megabytes`
- `G`, `g`, `gigabyte`, `gigabytes`
- `T`, `t`, `terabyte`, `terabytes`
Values are interpreted as for memory (powers of two scale) not as
for hard drives (powers of ten scale).
For powers of ten, exactly these strings are supported:
(A generic `getBytes()`, as opposed to `getMemorySizeInBytes()`,
might wish to support both the SI power of ten units and the IEC
power of two units. But until an implementation needs that, no
such thing is documented here.)
- `kB`, `kilobyte`, `kilobytes`
- `MB`, `megabyte`, `megabytes`
- `GB`, `gigabyte`, `gigabytes`
- `TB`, `terabyte`, `terabytes`
- `PB`, `petabyte`, `petabytes`
- `EB`, `exabyte`, `exabytes`
- `ZB`, `zettabyte`, `zettabytes`
- `YB`, `yottabyte`, `yottabytes`
For powers of two, exactly these strings are supported:
- `K`, `k`, `Ki`, `KiB`, `kibibyte`, `kibibytes`
- `M`, `m`, `Mi`, `MiB`, `mebibyte`, `mebibytes`
- `G`, `g`, `Gi`, `GiB`, `gibibyte`, `gibibytes`
- `T`, `t`, `Ti`, `TiB`, `tebibyte`, `tebibytes`
- `P`, `p`, `Pi`, `PiB`, `pebibyte`, `pebibytes`
- `E`, `e`, `Ei`, `EiB`, `exbibyte`, `exbibytes`
- `Z`, `z`, `Zi`, `ZiB`, `zebibyte`, `zebibytes`
- `Y`, `y`, `Yi`, `YiB`, `yobibyte`, `yobibytes`
It's very unclear which units the single-character abbreviations
("128K") should go with; some precedents such as `java -Xmx 2G`
and the GNU tools such as `ls` map these to powers of two, so this
spec copies that. You can certainly find examples of mapping these
to powers of ten, though. If you don't like ambiguity, don't use
the single-letter abbreviations.
### Config object merging and file merging

View File

@ -289,25 +289,23 @@ public interface Config extends ConfigMergeable {
ConfigValue getValue(String path);
/**
* Gets a value as a size in bytes (parses special strings like "128M"). The
* size units are interpreted as for memory, not as for disk space, so they
* are in powers of two. If the value is already a number, then it's left
* alone; if it's a string, it's parsed understanding unit suffixes such as
* "128K", as documented in the <a
* href="https://github.com/havocp/config/blob/master/HOCON.md">the
* Gets a value as a size in bytes (parses special strings like "128M"). If
* the value is already a number, then it's left alone; if it's a string,
* it's parsed understanding unit suffixes such as "128K", as documented in
* the <a href="https://github.com/havocp/config/blob/master/HOCON.md">the
* spec</a>.
*
* @param path
* path expression
* @return the memory size value at the requested path, in bytes
* @return the value at the requested path, in bytes
* @throws ConfigException.Missing
* if value is absent or null
* @throws ConfigException.WrongType
* if value is not convertible to Long or String
* @throws ConfigException.BadValue
* if value cannot be parsed as a memory size
* if value cannot be parsed as a size in bytes
*/
Long getMemorySizeInBytes(String path);
Long getBytes(String path);
/**
* Get value as a duration in milliseconds. If the value is already a

View File

@ -4,7 +4,9 @@
package com.typesafe.config.impl;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import com.typesafe.config.Config;
@ -183,13 +185,13 @@ class SimpleConfig implements Config {
}
@Override
public Long getMemorySizeInBytes(String path) {
public Long getBytes(String path) {
Long size = null;
try {
size = getLong(path);
} catch (ConfigException.WrongType e) {
ConfigValue v = find(path, ConfigValueType.STRING, path);
size = parseMemorySizeInBytes((String) v.unwrapped(),
size = parseBytes((String) v.unwrapped(),
v.origin(), path);
}
return size;
@ -333,7 +335,7 @@ class SimpleConfig implements Config {
l.add(((Number) v.unwrapped()).longValue());
} else if (v.valueType() == ConfigValueType.STRING) {
String s = (String) v.unwrapped();
Long n = parseMemorySizeInBytes(s, v.origin(), path);
Long n = parseBytes(s, v.origin(), path);
l.add(n);
} else {
throw new ConfigException.WrongType(v.origin(), path,
@ -496,23 +498,87 @@ class SimpleConfig implements Config {
}
private static enum MemoryUnit {
BYTES(1), KILOBYTES(1024), MEGABYTES(1024 * 1024), GIGABYTES(
1024 * 1024 * 1024), TERABYTES(1024 * 1024 * 1024 * 1024);
BYTES("", 1024, 0),
int bytes;
KILOBYTES("kilo", 1000, 1),
MEGABYTES("mega", 1000, 2),
GIGABYTES("giga", 1000, 3),
TERABYTES("tera", 1000, 4),
PETABYTES("peta", 1000, 5),
EXABYTES("exa", 1000, 6),
ZETTABYTES("zetta", 1000, 7),
YOTTABYTES("yotta", 1000, 8),
MemoryUnit(int bytes) {
KIBIBYTES("kibi", 1024, 1),
MEBIBYTES("mebi", 1024, 2),
GIBIBYTES("gibi", 1024, 3),
TEBIBYTES("tebi", 1024, 4),
PEBIBYTES("pebi", 1024, 5),
EXBIBYTES("exbi", 1024, 6),
ZEBIBYTES("zebi", 1024, 7),
YOBIBYTES("yobi", 1024, 8);
final String prefix;
final int powerOf;
final int power;
final long bytes;
MemoryUnit(String prefix, int powerOf, int power) {
this.prefix = prefix;
this.powerOf = powerOf;
this.power = power;
int i = power;
long bytes = 1;
while (i > 0) {
bytes *= powerOf;
--i;
}
this.bytes = bytes;
}
private static Map<String, MemoryUnit> makeUnitsMap() {
Map<String, MemoryUnit> map = new HashMap<String, MemoryUnit>();
for (MemoryUnit unit : MemoryUnit.values()) {
map.put(unit.prefix + "byte", unit);
map.put(unit.prefix + "bytes", unit);
if (unit.prefix.length() == 0) {
map.put("b", unit);
map.put("B", unit);
map.put("", unit); // no unit specified means bytes
} else {
String first = unit.prefix.substring(0, 1);
String firstUpper = first.toUpperCase();
if (unit.powerOf == 1024) {
map.put(first, unit); // 512m
map.put(firstUpper, unit); // 512M
map.put(firstUpper + "i", unit); // 512Mi
map.put(firstUpper + "iB", unit); // 512MiB
} else if (unit.powerOf == 1000) {
if (unit.power == 1) {
map.put(first + "B", unit); // 512kB
} else {
map.put(firstUpper + "B", unit); // 512MB
}
} else {
throw new RuntimeException("broken MemoryUnit enum");
}
}
}
return map;
}
private static Map<String, MemoryUnit> unitsMap = makeUnitsMap();
static MemoryUnit parseUnit(String unit) {
return unitsMap.get(unit);
}
}
/**
* Parses a memory-size string. If no units are specified in the string, it
* is assumed to be in bytes. The returned value is in bytes. The purpose of
* this function is to implement the memory-size-related methods in the
* ConfigObject interface. The units parsed are interpreted as powers of
* two, that is, the convention for memory rather than the convention for
* disk space.
* Parses a size-in-bytes string. If no units are specified in the string,
* it is assumed to be in bytes. The returned value is in bytes. The purpose
* of this function is to implement the size-in-bytes-related methods in the
* Config interface.
*
* @param input
* the string to parse
@ -524,19 +590,12 @@ class SimpleConfig implements Config {
* @throws ConfigException
* if string is invalid
*/
public static long parseMemorySizeInBytes(String input,
ConfigOrigin originForException, String pathForException) {
public static long parseBytes(String input, ConfigOrigin originForException,
String pathForException) {
String s = ConfigUtil.unicodeTrim(input);
String unitStringMaybePlural = getUnits(s);
String unitString;
if (unitStringMaybePlural.endsWith("s"))
unitString = unitStringMaybePlural.substring(0,
unitStringMaybePlural.length() - 1);
else
unitString = unitStringMaybePlural;
String unitStringLower = unitString.toLowerCase();
String numberString = ConfigUtil.unicodeTrim(s.substring(0, s.length()
- unitStringMaybePlural.length()));
String unitString = getUnits(s);
String numberString = ConfigUtil.unicodeTrim(s.substring(0,
s.length() - unitString.length()));
// this would be caught later anyway, but the error message
// is more helpful if we check it here.
@ -545,40 +604,25 @@ class SimpleConfig implements Config {
pathForException, "No number in size-in-bytes value '"
+ input + "'");
MemoryUnit units = null;
MemoryUnit units = MemoryUnit.parseUnit(unitString);
// the short abbreviations are case-insensitive but you can't write the
// long form words in all caps.
if (unitString.equals("") || unitStringLower.equals("b")
|| unitString.equals("byte")) {
units = MemoryUnit.BYTES;
} else if (unitStringLower.equals("k") || unitString.equals("kilobyte")) {
units = MemoryUnit.KILOBYTES;
} else if (unitStringLower.equals("m") || unitString.equals("megabyte")) {
units = MemoryUnit.MEGABYTES;
} else if (unitStringLower.equals("g") || unitString.equals("gigabyte")) {
units = MemoryUnit.GIGABYTES;
} else if (unitStringLower.equals("t") || unitString.equals("terabyte")) {
units = MemoryUnit.TERABYTES;
} else {
throw new ConfigException.BadValue(originForException,
pathForException, "Could not parse size unit '"
+ unitStringMaybePlural + "' (try b, k, m, g, t)");
if (units == null) {
throw new ConfigException.BadValue(originForException, pathForException,
"Could not parse size-in-bytes unit '" + unitString
+ "' (try k, K, kB, KiB, kilobytes, kibibytes)");
}
try {
// if the string is purely digits, parse as an integer to avoid
// possible precision loss;
// otherwise as a double.
// possible precision loss; otherwise as a double.
if (numberString.matches("[0-9]+")) {
return Long.parseLong(numberString) * units.bytes;
} else {
return (long) (Double.parseDouble(numberString) * units.bytes);
}
} catch (NumberFormatException e) {
throw new ConfigException.BadValue(originForException,
pathForException, "Could not parse memory size number '"
+ numberString + "'");
throw new ConfigException.BadValue(originForException, pathForException,
"Could not parse size-in-bytes number '" + numberString + "'");
}
}
}

View File

@ -602,7 +602,7 @@ class ConfigTest extends TestUtils {
}
intercept[ConfigException.Null] {
conf.getMemorySizeInBytes("nulls.null")
conf.getBytes("nulls.null")
}
// should throw WrongType if key is wrong type and not convertible
@ -631,7 +631,7 @@ class ConfigTest extends TestUtils {
}
intercept[ConfigException.WrongType] {
conf.getMemorySizeInBytes("ints")
conf.getBytes("ints")
}
// should throw BadPath on various bad paths
@ -658,7 +658,7 @@ class ConfigTest extends TestUtils {
}
intercept[ConfigException.BadValue] {
conf.getMemorySizeInBytes("strings.a")
conf.getBytes("strings.a")
}
}
@ -718,11 +718,11 @@ class ConfigTest extends TestUtils {
assertEquals(500L, conf.getMilliseconds("durations.halfSecond"))
// should get size in bytes
assertEquals(1024 * 1024L, conf.getMemorySizeInBytes("memsizes.meg"))
assertEquals(1024 * 1024L, conf.getMemorySizeInBytes("memsizes.megAsNumber"))
assertEquals(1024 * 1024L, conf.getBytes("memsizes.meg"))
assertEquals(1024 * 1024L, conf.getBytes("memsizes.megAsNumber"))
assertEquals(Seq(1024 * 1024L, 1024 * 1024L, 1024L * 1024L),
conf.getMemorySizeInBytesList("memsizes.megsList").asScala)
assertEquals(512 * 1024L, conf.getMemorySizeInBytes("memsizes.halfMeg"))
assertEquals(512 * 1024L, conf.getBytes("memsizes.halfMeg"))
}
@Test

View File

@ -42,36 +42,64 @@ class UnitParserTest extends TestUtils {
@Test
def parseMemorySizeInBytes() {
val oneMegs = List("1048576", "1048576b", "1048576bytes", "1048576byte",
def parseMem(s: String) = SimpleConfig.parseBytes(s, fakeOrigin(), "test")
val oneMebis = List("1048576", "1048576b", "1048576bytes", "1048576byte",
"1048576 b", "1048576 bytes",
" 1048576 b ", " 1048576 bytes ",
"1048576B",
"1024k", "1024K", "1024 kilobytes", "1024 kilobyte",
"1m", "1M", "1 M", "1 megabytes", "1 megabyte",
"0.0009765625g", "0.0009765625G", "0.0009765625 gigabytes", "0.0009765625 gigabyte")
"1024k", "1024K", "1024Ki", "1024KiB", "1024 kibibytes", "1024 kibibyte",
"1m", "1M", "1 M", "1Mi", "1MiB", "1 mebibytes", "1 mebibyte",
"0.0009765625g", "0.0009765625G", "0.0009765625Gi", "0.0009765625GiB", "0.0009765625 gibibytes", "0.0009765625 gibibyte")
def parseMem(s: String) = SimpleConfig.parseMemorySizeInBytes(s, fakeOrigin(), "test")
for (s <- oneMegs) {
for (s <- oneMebis) {
val result = parseMem(s)
assertEquals(1024 * 1024, result)
}
assertEquals(1024 * 1024 * 1024 * 1024, parseMem("1t"))
assertEquals(1024 * 1024 * 1024 * 1024, parseMem(" 1 T "))
assertEquals(1024 * 1024 * 1024 * 1024, parseMem("1 terabyte"))
assertEquals(1024 * 1024 * 1024 * 1024, parseMem(" 1 terabytes "))
val oneMegas = List("1000000", "1000000b", "1000000bytes", "1000000byte",
"1000000 b", "1000000 bytes",
" 1000000 b ", " 1000000 bytes ",
"1000000B",
"1000kB", "1000 kilobytes", "1000 kilobyte",
"1MB", "1 megabytes", "1 megabyte",
".001GB", ".001 gigabytes", ".001 gigabyte")
for (s <- oneMegas) {
val result = parseMem(s)
assertEquals(1000 * 1000, result)
}
var result = 1024L * 1024 * 1024
for (unit <- Seq("tebi", "pebi", "exbi", "zebi", "yobi")) {
val first = unit.substring(0, 1).toUpperCase()
result = result * 1024;
assertEquals(result, parseMem("1" + first))
assertEquals(result, parseMem("1" + first + "i"))
assertEquals(result, parseMem("1" + first + "iB"))
assertEquals(result, parseMem("1" + unit + "byte"))
assertEquals(result, parseMem("1" + unit + "bytes"))
}
result = 1000L * 1000 * 1000
for (unit <- Seq("tera", "peta", "exa", "zetta", "yotta")) {
val first = unit.substring(0, 1).toUpperCase()
result = result * 1000;
assertEquals(result, parseMem("1" + first + "B"))
assertEquals(result, parseMem("1" + unit + "byte"))
assertEquals(result, parseMem("1" + unit + "bytes"))
}
// bad units
val e = intercept[ConfigException.BadValue] {
SimpleConfig.parseMemorySizeInBytes("100 dollars", fakeOrigin(), "test")
SimpleConfig.parseBytes("100 dollars", fakeOrigin(), "test")
}
assertTrue(e.getMessage().contains("size unit"))
assertTrue(e.getMessage().contains("size-in-bytes unit"))
// bad number
val e2 = intercept[ConfigException.BadValue] {
SimpleConfig.parseMemorySizeInBytes("1 00 bytes", fakeOrigin(), "test")
SimpleConfig.parseBytes("1 00 bytes", fakeOrigin(), "test")
}
assertTrue(e2.getMessage().contains("size number"))
assertTrue(e2.getMessage().contains("size-in-bytes number"))
}
}