tango.sys.win32.CodePage

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
/*******************************************************************************

        copyright:      Copyright (c) 2007 Kris Bell. All rights reserved

        license:        BSD style: $(LICENSE)

        version:        Initial release: April 2007

        author:         Kris

*******************************************************************************/

module tango.sys.win32.CodePage;

private import tango.sys.Common;

private import tango.core.Exception;

/*******************************************************************************

        Convert text to and from Windows 'code pages'. This is non-portable,
        and will be unlikely to operate even across all Windows platforms.

*******************************************************************************/

struct CodePage
{
        /**********************************************************************

                Test a text array to see if it contains non-ascii elements.
                Returns true if ascii, false otherwise

        **********************************************************************/

        static bool isAscii (const(char)[] src)
        {
                foreach (c; src)
                         if (c & 0x80)
                             return false;
                return true;
        }


        /**********************************************************************

                Convert utf8 text to a codepage representation

                page  0     - the ansi code page
                      1     - the oem code page
                      2     - the mac code page
                      3     - ansi code page for the calling thread
                      65000 - UTF-7 translation
                      65001 - UTF-8 translation

                      or a region-specific codepage

                returns: a slice of the provided output buffer
                         representing converted text

                Note that the input must be utf8 encoded. Note also
                that the dst output should be sufficiently large to
                accomodate the output; a size of 2*src.length would
                be enough to host almost any conversion

        **********************************************************************/

        static char[] into (const(char)[] src, char[] dst, uint page=0)
        {
                return convert (src, dst, CP_UTF8, page);
        }


        /**********************************************************************

                Convert codepage text to a utf8 representation

                page  0     - the ansi code page
                      1     - the oem code page
                      2     - the mac code page
                      3     - ansi code page for the calling thread
                      65000 - UTF-7 translation
                      65001 - UTF-8 translation

                      or a region-specific codepage

                returns: a slice of the provided output buffer
                         representing converted text

                Note that the input will be utf8 encoded. Note also
                that the dst output should be sufficiently large to
                accomodate the output; a size of 2*src.length would
                be enough to host almost any conversion

        **********************************************************************/

        static char[] from (const(char)[] src, char[] dst, uint page=0)
        {
                return convert (src, dst, page, CP_UTF8);
        }


        /**********************************************************************

                Internal conversion routine; we avoid heap activity for
                strings of short and medium length. A zero is appended
                to the dst array in order to simplify C API conversions

        **********************************************************************/

        private static char[] convert (const(char)[] src, char[] dst, uint from, uint into)
        {
                size_t len = 0;

                // sanity check
                assert (dst.length);

                // got some input?
                if (src.length > 0)
                   {
                   wchar[2000] tmp = void;
                   wchar[] wide = (src.length <= tmp.length) ? tmp : new wchar[src.length];

                   len = MultiByteToWideChar (from, 0, cast(PCHAR)src.ptr, src.length,
                                              wide.ptr, wide.length);
                   if (len)
                       len = WideCharToMultiByte (into, 0, wide.ptr, len,
                                                  cast(PCHAR)dst.ptr, dst.length-1, null, null);
                   if (len is 0)
                       throw new IllegalArgumentException ("CodePage.convert :: "~SysError.lastMsg.idup);
                   }

                // append a null terminator
                dst[len] = 0;
                return dst [0 .. len];
        }
}


debug(Test)
{
        void main ()
        {
                char[] s = "foo";
                char[3] x = void;

                //if (! CodePage.isAscii (s))
                      s = CodePage.into (s, x);
                      s = CodePage.from (s, x);
        }
}