| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782 | /******************************************************************************* copyright: Copyright (c) 2004 Kris Bell. All rights reserved license: BSD style: $(LICENSE) version: Initial release: Nov 2005 author: Kris A set of functions for converting between string and integer values. Applying the D "import alias" mechanism to this module is highly recommended, in order to limit namespace pollution: --- import Integer = tango.text.convert.Integer; auto i = Integer.parse ("32767"); --- *******************************************************************************/ module tango.text.convert.Integer; private import tango.core.Exception; private import tango.core.Octal; /****************************************************************************** Parse an integer value from the provided 'digits' string. The string is inspected for a sign and an optional radix prefix. A radix may be provided as an argument instead, whereupon it must match the prefix (where present). When radix is set to zero, conversion will default to decimal. Throws: IllegalArgumentException where the input text is not parsable in its entirety. See_also: the low level functions parse() and convert() ******************************************************************************/ int toInt(T) (const(T[]) digits, uint radix=0) { auto x = toLong (digits, radix); if (x > int.max) throw new IllegalArgumentException ("Integer.toInt :: integer overflow"); return cast(int) x; } /****************************************************************************** Parse an integer value from the provided 'digits' string. The string is inspected for a sign and an optional radix prefix. A radix may be provided as an argument instead, whereupon it must match the prefix (where present). When radix is set to zero, conversion will default to decimal. Throws: IllegalArgumentException where the input text is not parsable in its entirety. See_also: the low level functions parse() and convert() ******************************************************************************/ long toLong(T) (const(T[]) digits, uint radix=0) { size_t len; auto x = parse (digits, radix, &len); if (len < digits.length) throw new IllegalArgumentException ("Integer.toLong :: invalid literal"); return x; } /****************************************************************************** Parse an unsignedinteger value from the provided 'digits' string. The string is inspected for an optional radix prefix. A radix may be provided as an argument instead, whereupon it must match the prefix (where present). When radix is set to zero, conversion will default to decimal. Throws: IllegalArgumentException where the input text is not parsable in its entirety. See_also: the low level functions parse() and convert() ******************************************************************************/ ulong toUlong(T) (const(T[]) digits, uint radix=0) { bool sign = false; auto eaten = trim (digits, sign, radix); if (sign) throw new IllegalArgumentException ("Integer.toUlong :: invalid literal"); size_t len = 0; auto value = convert (digits[eaten..$], radix, &len); if (len == 0 || eaten + len < digits.length) throw new IllegalArgumentException ("Integer.toUlong :: invalid literal"); return value; } /****************************************************************************** Wrapper to make life simpler. Returns a text version of the provided value. See format() for details ******************************************************************************/ char[] toString (long i, const(char[]) fmt = null) { char[66] tmp = void; return format (tmp, i, fmt).dup; } /****************************************************************************** Wrapper to make life simpler. Returns a text version of the provided value. See format() for details ******************************************************************************/ wchar[] toString16 (long i, const(wchar[]) fmt = null) { wchar[66] tmp = void; return format (tmp, i, fmt).dup; } /****************************************************************************** Wrapper to make life simpler. Returns a text version of the provided value. See format() for details ******************************************************************************/ dchar[] toString32 (long i, const(dchar[]) fmt = null) { dchar[66] tmp = void; return format (tmp, i, fmt).dup; } /******************************************************************************* Supports format specifications via an array, where format follows the notation given below: --- type width prefix --- Type is one of [d, g, u, b, x, o] or uppercase equivalent, and dictates the conversion radix or other semantics. Width is optional and indicates a minimum width for zero-padding, while the optional prefix is one of ['#', ' ', '+'] and indicates what variety of prefix should be placed in the output. e.g. --- "d" => integer "u" => unsigned "o" => octal "b" => binary "x" => hexadecimal "X" => hexadecimal uppercase "d+" => integer prefixed with "+" "b#" => binary prefixed with "0b" "x#" => hexadecimal prefixed with "0x" "X#" => hexadecimal prefixed with "0X" "d8" => decimal padded to 8 places as required "b8" => binary padded to 8 places as required "b8#" => binary padded to 8 places and prefixed with "0b" --- Note that the specified width is exclusive of the prefix, though the width padding will be shrunk as necessary in order to ensure a requested prefix can be inserted into the provided output. *******************************************************************************/ T[] format(T) (T[] dst, long i, const(T[]) fmt = null) { char pre, type; int width; decode (fmt, type, pre, width); return formatter (dst, i, type, pre, width); } private void decode(T) (in T[] fmt, ref char type, out char pre, out int width) { if (fmt.length is 0) type = 'd'; else { type = cast(char)fmt[0]; if (fmt.length > 1) { auto p = &fmt[1]; for (int j=1; j < fmt.length; ++j, ++p) if (*p >= '0' && *p <= '9') width = width * 10 + (*p - '0'); else pre = cast(char)*p; } } } private struct _FormatterInfo(T) { uint radix; immutable(T)[] prefix; immutable(T)[] numbers; } T[] formatter(T) (T[] dst, long i, char type, char pre, int width) { enum immutable(T)[] lower = "0123456789abcdef"; enum immutable(T)[] upper = "0123456789ABCDEF"; alias _FormatterInfo!(T) Info; enum Info[] formats = [ {10, null, lower}, {10, "-", lower}, {10, " ", lower}, {10, "+", lower}, { 2, "0b", lower}, { 8, "0o", lower}, {16, "0x", lower}, {16, "0X", upper}, ]; ubyte index; int len = cast(int)dst.length; if (len) { switch (type) { case 'd': case 'D': case 'g': case 'G': if (i < 0) { index = 1; i = -i; } else if (pre is ' ') index = 2; else if (pre is '+') index = 3; goto case 'U'; case 'u': case 'U': pre = '#'; break; case 'b': case 'B': index = 4; break; case 'o': case 'O': index = 5; break; case 'x': index = 6; break; case 'X': index = 7; break; default: return cast(T[])"{unknown format '".dup~cast(T)type~cast(T[])"'}".dup; } auto info = &formats[index]; auto numbers = info.numbers; auto radix = info.radix; // convert number to text auto p = dst.ptr + len; if (uint.max >= cast(ulong) i) { auto v = cast (uint) i; do { *--p = numbers [v % radix]; } while ((v /= radix) && --len); } else { auto v = cast (ulong) i; do { *--p = numbers [cast(uint) (v % radix)]; } while ((v /= radix) && --len); } auto prefix = (pre is '#') ? info.prefix : null; if (len > prefix.length) { len -= prefix.length + 1; // prefix number with zeros? if (width) { width = cast(int)dst.length - width - cast(int)prefix.length; while (len > width && len > 0) { *--p = '0'; --len; } } // write optional prefix string ... dst [len .. len + prefix.length] = prefix; // return slice of provided output buffer return dst [len .. $]; } } return cast(T[])"{output width too small}".dup; } /****************************************************************************** Parse an integer value from the provided 'digits' string. The string is inspected for a sign and an optional radix prefix. A radix may be provided as an argument instead, whereupon it must match the prefix (where present). When radix is set to zero, conversion will default to decimal. A non-null 'ate' will return the number of characters used to construct the returned value. Throws: none. The 'ate' param should be checked for valid input. ******************************************************************************/ long parse(T) (T[] digits, uint radix=0, size_t* ate=null) { bool sign; auto eaten = trim (digits, sign, radix); auto value = convert (digits[eaten..$], radix, ate); // check *ate > 0 to make sure we don't parse "-" as 0. if (ate && *ate > 0) *ate += eaten; return cast(long) (sign ? -value : value); } /****************************************************************************** Convert the provided 'digits' into an integer value, without checking for a sign or radix. The radix defaults to decimal (10). Returns the value and updates 'ate' with the number of characters consumed. Throws: none. The 'ate' param should be checked for valid input. ******************************************************************************/ ulong convert(T) (const(T[]) digits, uint radix=10, size_t* ate=null) { uint eaten; ulong value; foreach (c; cast(T[])digits) { if (c >= '0' && c <= '9') {} else if (c >= 'a' && c <= 'z') c -= 39; else if (c >= 'A' && c <= 'Z') c -= 7; else break; if ((c -= '0') < radix) { value = value * radix + c; ++eaten; } else break; } if (ate) *ate = eaten; return value; } /****************************************************************************** Strip leading whitespace, extract an optional +/- sign, and an optional radix prefix. If the radix value matches an optional prefix, or the radix is zero, the prefix will be consumed and assigned. Where the radix is non zero and does not match an explicit prefix, the latter will remain unconsumed. Otherwise, radix will default to 10. Returns the number of characters consumed. ******************************************************************************/ size_t trim(T) (const(T[]) digits, ref bool sign, ref uint radix) { T c; const (T)* p = digits.ptr; auto len = digits.length; if (len) { // strip off whitespace and sign characters for (c = *p; len; c = *++p, --len) if (c is ' ' || c is '\t') {} else if (c is '-') sign = true; else if (c is '+') sign = false; else break; // strip off a radix specifier also? auto r = radix; if (c is '0' && len > 1) switch (*++p) { case 'x': case 'X': ++p; r = 16; break; case 'b': case 'B': ++p; r = 2; break; case 'o': case 'O': ++p; r = 8; break; default: --p; break; } // default the radix to 10 if (r is 0) radix = 10; else // explicit radix must match (optional) prefix if (radix != r) { if (radix) p -= 2; else radix = r; } } // return number of characters eaten return (p - digits.ptr); } /****************************************************************************** quick & dirty text-to-unsigned int converter. Use only when you know what the content is, or use parse() or convert() instead. Return the parsed uint ******************************************************************************/ uint atoi(T) (T[] s, int radix = 10) { uint value; foreach (c; s) if (c >= '0' && c <= '9') value = value * radix + (c - '0'); else break; return value; } /****************************************************************************** quick & dirty unsigned to text converter, where the provided output must be large enough to house the result (10 digits in the largest case). For mainstream use, consider utilizing format() instead. Returns a populated slice of the provided output ******************************************************************************/ T[] itoa(T) (T[] output, uint value, int radix = 10) { T* p = output.ptr + output.length; do { *--p = cast(T)(value % radix + '0'); } while (value /= radix); return output[cast(size_t) (p-output.ptr) .. $]; } /****************************************************************************** Consume a number from the input without converting it. Argument 'fp' enables floating-point consumption. Supports hex input for numbers which are prefixed appropriately Since version 0.99.9 ******************************************************************************/ T[] consume(T) (T[] src, bool fp=false) { T c; bool sign; uint radix; // remove leading space, and sign auto e = src.ptr + src.length; auto p = src.ptr + trim (src, sign, radix); auto b = p; // bail out if the string is empty if (src.length is 0 || p > &src[$-1]) return null; // read leading digits for (c=*p; p < e && ((c >= '0' && c <= '9') || (radix is 16 && ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))));) c = *++p; if (fp) { // gobble up a point if (c is '.' && p < e) c = *++p; // read fractional digits while (c >= '0' && c <= '9' && p < e) c = *++p; // did we consume anything? if (p > b) { // consume exponent? if ((c is 'e' || c is 'E') && p < e ) { c = *++p; if (c is '+' || c is '-') c = *++p; while (c >= '0' && c <= '9' && p < e) c = *++p; } } } return src [0 .. p-src.ptr]; } /****************************************************************************** ******************************************************************************/ debug (UnitTest) { unittest { char[64] tmp; assert (toInt("1") is 1); assert (toLong("1") is 1); assert (toInt("1", 10) is 1); assert (toLong("1", 10) is 1); assert (toUlong("1", 10) is 1); assert (toUlong("18446744073709551615") is ulong.max); assert (atoi ("12345") is 12345); assert (itoa (tmp, 12345) == "12345"); assert(parse( "0"w ) == 0 ); assert(parse( "1"w ) == 1 ); assert(parse( "-1"w ) == -1 ); assert(parse( "+1"w ) == 1 ); // numerical limits assert(parse( "-2147483648" ) == int.min ); assert(parse( "2147483647" ) == int.max ); assert(parse( "4294967295" ) == uint.max ); assert(parse( "-9223372036854775808" ) == long.min ); assert(parse( "9223372036854775807" ) == long.max ); assert(parse( "18446744073709551615" ) == ulong.max ); // hex assert(parse( "a", 16) == 0x0A ); assert(parse( "b", 16) == 0x0B ); assert(parse( "c", 16) == 0x0C ); assert(parse( "d", 16) == 0x0D ); assert(parse( "e", 16) == 0x0E ); assert(parse( "f", 16) == 0x0F ); assert(parse( "A", 16) == 0x0A ); assert(parse( "B", 16) == 0x0B ); assert(parse( "C", 16) == 0x0C ); assert(parse( "D", 16) == 0x0D ); assert(parse( "E", 16) == 0x0E ); assert(parse( "F", 16) == 0x0F ); assert(parse( "FFFF", 16) == ushort.max ); assert(parse( "ffffFFFF", 16) == uint.max ); assert(parse( "ffffFFFFffffFFFF", 16u ) == ulong.max ); // oct assert(parse( "55", 8) == octal!(55) ); assert(parse( "100", 8) == octal!(100) ); // bin assert(parse( "10000", 2) == 0x10 ); // trim assert(parse( " \t20") == 20 ); assert(parse( " \t-20") == -20 ); assert(parse( "- \t 20") == -20 ); // recognise radix prefix assert(parse( "0xFFFF" ) == ushort.max ); assert(parse( "0XffffFFFF" ) == uint.max ); assert(parse( "0o55") == octal!(55) ); assert(parse( "0O55" ) == octal!(55) ); assert(parse( "0b10000") == 0x10 ); assert(parse( "0B10000") == 0x10 ); // prefix tests auto str = "0x"; assert(parse( str[0..1] ) == 0 ); assert(parse("0x10", 10) == 0); assert(parse("0b10", 10) == 0); assert(parse("0o10", 10) == 0); assert(parse("0b10") == 0b10); assert(parse("0o10") == octal!(10)); assert(parse("0b10", 2) == 0b10); assert(parse("0o10", 8) == octal!(10)); // revised tests assert (format(tmp, 10, "d") == "10"); assert (format(tmp, -10, "d") == "-10"); assert (format(tmp, 10L, "u") == "10"); assert (format(tmp, 10L, "U") == "10"); assert (format(tmp, 10L, "g") == "10"); assert (format(tmp, 10L, "G") == "10"); assert (format(tmp, 10L, "o") == "12"); assert (format(tmp, 10L, "O") == "12"); assert (format(tmp, 10L, "b") == "1010"); assert (format(tmp, 10L, "B") == "1010"); assert (format(tmp, 10L, "x") == "a"); assert (format(tmp, 10L, "X") == "A"); assert (format(tmp, 10L, "d+") == "+10"); assert (format(tmp, 10L, "d ") == " 10"); assert (format(tmp, 10L, "d#") == "10"); assert (format(tmp, 10L, "x#") == "0xa"); assert (format(tmp, 10L, "X#") == "0XA"); assert (format(tmp, 10L, "b#") == "0b1010"); assert (format(tmp, 10L, "o#") == "0o12"); assert (format(tmp, 10L, "d1") == "10"); assert (format(tmp, 10L, "d8") == "00000010"); assert (format(tmp, 10L, "x8") == "0000000a"); assert (format(tmp, 10L, "X8") == "0000000A"); assert (format(tmp, 10L, "b8") == "00001010"); assert (format(tmp, 10L, "o8") == "00000012"); assert (format(tmp, 10L, "d1#") == "10"); assert (format(tmp, 10L, "d6#") == "000010"); assert (format(tmp, 10L, "x6#") == "0x00000a"); assert (format(tmp, 10L, "X6#") == "0X00000A"); char[8] tmp1; assert (format(tmp1, 10L, "b12#") == "0b001010"); assert (format(tmp1, 10L, "o12#") == "0o000012"); } } /****************************************************************************** ******************************************************************************/ debug (Integer) { import tango.io.Stdout; void main() { char[8] tmp; Stdout.formatln ("d '{}'", format(tmp, 10)); Stdout.formatln ("d '{}'", format(tmp, -10)); Stdout.formatln ("u '{}'", format(tmp, 10L, "u")); Stdout.formatln ("U '{}'", format(tmp, 10L, "U")); Stdout.formatln ("g '{}'", format(tmp, 10L, "g")); Stdout.formatln ("G '{}'", format(tmp, 10L, "G")); Stdout.formatln ("o '{}'", format(tmp, 10L, "o")); Stdout.formatln ("O '{}'", format(tmp, 10L, "O")); Stdout.formatln ("b '{}'", format(tmp, 10L, "b")); Stdout.formatln ("B '{}'", format(tmp, 10L, "B")); Stdout.formatln ("x '{}'", format(tmp, 10L, "x")); Stdout.formatln ("X '{}'", format(tmp, 10L, "X")); Stdout.formatln ("d+ '{}'", format(tmp, 10L, "d+")); Stdout.formatln ("ds '{}'", format(tmp, 10L, "d ")); Stdout.formatln ("d# '{}'", format(tmp, 10L, "d#")); Stdout.formatln ("x# '{}'", format(tmp, 10L, "x#")); Stdout.formatln ("X# '{}'", format(tmp, 10L, "X#")); Stdout.formatln ("b# '{}'", format(tmp, 10L, "b#")); Stdout.formatln ("o# '{}'", format(tmp, 10L, "o#")); Stdout.formatln ("d1 '{}'", format(tmp, 10L, "d1")); Stdout.formatln ("d8 '{}'", format(tmp, 10L, "d8")); Stdout.formatln ("x8 '{}'", format(tmp, 10L, "x8")); Stdout.formatln ("X8 '{}'", format(tmp, 10L, "X8")); Stdout.formatln ("b8 '{}'", format(tmp, 10L, "b8")); Stdout.formatln ("o8 '{}'", format(tmp, 10L, "o8")); Stdout.formatln ("d1# '{}'", format(tmp, 10L, "d1#")); Stdout.formatln ("d6# '{}'", format(tmp, 10L, "d6#")); Stdout.formatln ("x6# '{}'", format(tmp, 10L, "x6#")); Stdout.formatln ("X6# '{}'", format(tmp, 10L, "X6#")); Stdout.formatln ("b12# '{}'", format(tmp, 10L, "b12#")); Stdout.formatln ("o12# '{}'", format(tmp, 10L, "o12#")).newline; Stdout.formatln (consume("10")); Stdout.formatln (consume("0x1f")); Stdout.formatln (consume("0.123")); Stdout.formatln (consume("0.123", true)); Stdout.formatln (consume("0.123e-10", true)).newline; Stdout.formatln (consume("10 s")); Stdout.formatln (consume("0x1f s")); Stdout.formatln (consume("0.123 s")); Stdout.formatln (consume("0.123 s", true)); Stdout.formatln (consume("0.123e-10 s", true)).newline; } } |