123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
/*******************************************************************************

        copyright:      Copyright (c) 2007 Kris Bell. All rights reserved

        license:        BSD style: $(LICENSE)

        version:        Initial release: Nov 2007

        author:         Kris

        UTF conversion streams, supporting cross-translation of char, wchar
        and dchar variants. For supporting endian variations, configure the
        appropriate EndianStream upstream of this one (closer to the source.)

*******************************************************************************/

module tango.io.stream.Utf;

private import tango.io.device.Conduit;

private import tango.io.stream.Buffered;

private import Utf = tango.text.convert.Utf;

/*******************************************************************************

        Streaming UTF converter. Type T is the target or destination type,
        while S is the source type. Both types are either char/wchar/dchar.

*******************************************************************************/

class UtfInput(T, S) : InputFilter, InputFilter.Mutator
{
        static if (!is (S == char) && !is (S == wchar) && !is (S == dchar))
                    pragma (msg, "Source type must be char, wchar, or dchar");

        static if (!is (T == char) && !is (T == wchar) && !is (T == dchar))
                    pragma (msg, "Target type must be char, wchar, or dchar");

        private InputBuffer buffer;

        /***********************************************************************

                Create a buffered utf input converter.

        ***********************************************************************/

        this (InputStream stream)
        {
                super (buffer = BufferedInput.create (stream));
        }

        /***********************************************************************

                Consume input of type T, and return the number of array
                elements comsumed.

                Returns Eof upon end-of-flow.

        ***********************************************************************/

        final size_t consume (T[] dst)
        {
                auto x = read (dst);
                if (x != Eof)
                    x /= T.sizeof;
                return x;
        }

        /***********************************************************************

        ***********************************************************************/

        final override size_t read (void[] dst)
        {
                static if (is (S == T))
                           return super.read (dst);
                else
                   {
                   size_t   consumed,
                            produced;

                   size_t reader (const(void)[] src)
                   {
                        if (src.length < S.sizeof)
                            return Eof;

                        auto output = BufferedInput.convert!(T)(dst);
                        auto input  = BufferedInput.convert!(S)(src);

                        static if (is (T == char))
                                   produced = Utf.toString(input, output, &consumed).length;

                        static if (is (T == wchar))
                                   produced = Utf.toString16(input, output, &consumed).length;

                        static if (is (T == dchar))
                                   produced = Utf.toString32(input, output, &consumed).length;

                        // consume buffer content
                        return consumed * S.sizeof;
                   }

                   // must have some space available for converting
                   if (dst.length < T.sizeof)
                       conduit.error ("UtfStream.read :: target array is too small");

                   // convert next chunk of input
                   if (buffer.next(&reader) is false)
                       return Eof;

                   return produced * T.sizeof;
                   }
        }
}


/*******************************************************************************

        Streaming UTF converter. Type T is the target or destination type,
        while S is the source type. Both types are either char/wchar/dchar.

        Note that the arguments are reversed from those of UtfInput.

*******************************************************************************/

class UtfOutput (S, T) : OutputFilter, OutputFilter.Mutator
{
        static if (!is (S == char) && !is (S == wchar) && !is (S == dchar))
                    pragma (msg, "Source type must be char, wchar, or dchar");

        static if (!is (T == char) && !is (T == wchar) && !is (T == dchar))
                    pragma (msg, "Target type must be char, wchar, or dchar");


        private OutputBuffer buffer;

        /***********************************************************************

                Create a buffered utf output converter.

        ***********************************************************************/

        this (OutputStream stream)
        {
                super (buffer = BufferedOutput.create (stream));
        }

        /***********************************************************************

                Consume input of type T, and return the number of array
                elements consumed.

                Returns Eof upon end-of-flow.

        ***********************************************************************/

        final size_t consume (const(S)[] dst)
        {
                auto x = write (dst);
                if (x != Eof)
                    x /= S.sizeof;
                return x;
        }

        /***********************************************************************

                Write to the output stream from a source array. The provided
                src content is converted as necessary. Note that an attached
                output buffer must be at least four bytes wide to accommodate
                a conversion.

                Returns the number of bytes consumed from src, which may be
                less than the quantity provided.

        ***********************************************************************/

        final override size_t write (const(void)[] src)
        {
                static if (is (S == T))
                           return super.write (src);
                else
                   {
                   uint   consumed,
                          produced;

                   size_t writer (void[] dst)
                   {
                        // buffer must be at least 4 bytes wide
                        // to contain a generic conversion
                        if (dst.length < 4)
                            return Eof;

                        auto input = BufferedOutput.convert!(S)(src);
                        auto output = BufferedOutput.convert!(T)(dst);

                        static if (is (T == char))
                                   produced = Utf.toString(input, output, &consumed).length;

                        static if (is (T == wchar))
                                   produced = Utf.toString16(input, output, &consumed).length;

                        static if (is (T == dchar))
                                   produced = Utf.toString32(input, output, &consumed).length;

                        return produced * T.sizeof;
                   }

                   // write directly into buffered content and
                   // flush when the output is full
                   if (buffer.writer(&writer) is Eof)
                      {
                      buffer.flush;
                      if (buffer.writer(&writer) is Eof)
                          return Eof;
                      }
                   return consumed * S.sizeof;
                   }
        }
}


/*******************************************************************************

*******************************************************************************/

debug (Utf)
{
        import tango.io.Stdout;
        import tango.io.device.Array;

        void main()
        {
                auto inp = new UtfInput!(dchar, char)(new Array("hello world".dup));
                auto oot = new UtfOutput!(dchar, char)(new Array(20));
                oot.copy(inp);
                assert (oot.buffer.slice == "hello world");
        }
}