123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240 |
|
/*******************************************************************************
copyright: Copyright (c) 2007 Kris Bell. All rights reserved
license: BSD style: $(LICENSE)
version: Initial release: Nov 2007
author: Kris
UTF conversion streams, supporting cross-translation of char, wchar
and dchar variants. For supporting endian variations, configure the
appropriate EndianStream upstream of this one (closer to the source.)
*******************************************************************************/
module tango.io.stream.Utf;
private import tango.io.device.Conduit;
private import tango.io.stream.Buffered;
private import Utf = tango.text.convert.Utf;
/*******************************************************************************
Streaming UTF converter. Type T is the target or destination type,
while S is the source type. Both types are either char/wchar/dchar.
*******************************************************************************/
class UtfInput(T, S) : InputFilter, InputFilter.Mutator
{
static if (!is (S == char) && !is (S == wchar) && !is (S == dchar))
pragma (msg, "Source type must be char, wchar, or dchar");
static if (!is (T == char) && !is (T == wchar) && !is (T == dchar))
pragma (msg, "Target type must be char, wchar, or dchar");
private InputBuffer buffer;
/***********************************************************************
Create a buffered utf input converter.
***********************************************************************/
this (InputStream stream)
{
super (buffer = BufferedInput.create (stream));
}
/***********************************************************************
Consume input of type T, and return the number of array
elements comsumed.
Returns Eof upon end-of-flow.
***********************************************************************/
final size_t consume (T[] dst)
{
auto x = read (dst);
if (x != Eof)
x /= T.sizeof;
return x;
}
/***********************************************************************
***********************************************************************/
final override size_t read (void[] dst)
{
static if (is (S == T))
return super.read (dst);
else
{
size_t consumed,
produced;
size_t reader (const(void)[] src)
{
if (src.length < S.sizeof)
return Eof;
auto output = BufferedInput.convert!(T)(dst);
auto input = BufferedInput.convert!(S)(src);
static if (is (T == char))
produced = Utf.toString(input, output, &consumed).length;
static if (is (T == wchar))
produced = Utf.toString16(input, output, &consumed).length;
static if (is (T == dchar))
produced = Utf.toString32(input, output, &consumed).length;
// consume buffer content
return consumed * S.sizeof;
}
// must have some space available for converting
if (dst.length < T.sizeof)
conduit.error ("UtfStream.read :: target array is too small");
// convert next chunk of input
if (buffer.next(&reader) is false)
return Eof;
return produced * T.sizeof;
}
}
}
/*******************************************************************************
Streaming UTF converter. Type T is the target or destination type,
while S is the source type. Both types are either char/wchar/dchar.
Note that the arguments are reversed from those of UtfInput.
*******************************************************************************/
class UtfOutput (S, T) : OutputFilter, OutputFilter.Mutator
{
static if (!is (S == char) && !is (S == wchar) && !is (S == dchar))
pragma (msg, "Source type must be char, wchar, or dchar");
static if (!is (T == char) && !is (T == wchar) && !is (T == dchar))
pragma (msg, "Target type must be char, wchar, or dchar");
private OutputBuffer buffer;
/***********************************************************************
Create a buffered utf output converter.
***********************************************************************/
this (OutputStream stream)
{
super (buffer = BufferedOutput.create (stream));
}
/***********************************************************************
Consume input of type T, and return the number of array
elements consumed.
Returns Eof upon end-of-flow.
***********************************************************************/
final size_t consume (const(S)[] dst)
{
auto x = write (dst);
if (x != Eof)
x /= S.sizeof;
return x;
}
/***********************************************************************
Write to the output stream from a source array. The provided
src content is converted as necessary. Note that an attached
output buffer must be at least four bytes wide to accommodate
a conversion.
Returns the number of bytes consumed from src, which may be
less than the quantity provided.
***********************************************************************/
final override size_t write (const(void)[] src)
{
static if (is (S == T))
return super.write (src);
else
{
uint consumed,
produced;
size_t writer (void[] dst)
{
// buffer must be at least 4 bytes wide
// to contain a generic conversion
if (dst.length < 4)
return Eof;
auto input = BufferedOutput.convert!(S)(src);
auto output = BufferedOutput.convert!(T)(dst);
static if (is (T == char))
produced = Utf.toString(input, output, &consumed).length;
static if (is (T == wchar))
produced = Utf.toString16(input, output, &consumed).length;
static if (is (T == dchar))
produced = Utf.toString32(input, output, &consumed).length;
return produced * T.sizeof;
}
// write directly into buffered content and
// flush when the output is full
if (buffer.writer(&writer) is Eof)
{
buffer.flush;
if (buffer.writer(&writer) is Eof)
return Eof;
}
return consumed * S.sizeof;
}
}
}
/*******************************************************************************
*******************************************************************************/
debug (Utf)
{
import tango.io.Stdout;
import tango.io.device.Array;
void main()
{
auto inp = new UtfInput!(dchar, char)(new Array("hello world".dup));
auto oot = new UtfOutput!(dchar, char)(new Array(20));
oot.copy(inp);
assert (oot.buffer.slice == "hello world");
}
}
|