123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361 |
|
/*******************************************************************************
Copyright: Copyright (C) 2008 Kris Bell. All rights reserved.
License: BSD style: $(LICENSE)
version: Aug 2008: Initial release
Authors: Kris
*******************************************************************************/
module tango.text.xml.DocEntity;
private import Util = tango.text.Util;
/******************************************************************************
Convert XML entity patterns to normal characters
<pre>
& => ;
" => "
etc.
</pre>
******************************************************************************/
T[] fromEntity (T) (const(T)[] src, T[] dst = null)
{
ptrdiff_t delta;
auto s = src.ptr;
auto len = src.length;
// take a peek first to see if there's anything
if ((delta = Util.indexOf (s, '&', len)) < len)
{
// make some room if not enough provided
if (dst.length < src.length)
dst.length = src.length;
auto d = dst.ptr;
// copy segments over, a chunk at a time
do {
d [0 .. delta] = s [0 .. delta];
len -= delta;
s += delta;
d += delta;
// translate entity
auto token = 0;
switch (s[1])
{
case 'a':
if (len > 4 && s[1..5] == "amp;")
*d++ = '&', token = 5;
else
if (len > 5 && s[1..6] == "apos;")
*d++ = '\'', token = 6;
break;
case 'g':
if (len > 3 && s[1..4] == "gt;")
*d++ = '>', token = 4;
break;
case 'l':
if (len > 3 && s[1..4] == "lt;")
*d++ = '<', token = 4;
break;
case 'q':
if (len > 5 && s[1..6] == "quot;")
*d++ = '"', token = 6;
break;
default:
break;
}
if (token is 0)
*d++ = '&', token = 1;
s += token, len -= token;
} while ((delta = Util.indexOf (s, '&', len)) < len);
// copy tail too
d [0 .. len] = s [0 .. len];
return dst [0 .. (d + len) - dst.ptr];
}
if(dst.length < src.length)
dst.length = src.length;
dst[0..src.length] = src;
return dst[0..src.length];
}
/******************************************************************************
Convert XML entity patterns to normal characters
---
& => ;
" => "
etc
---
This variant does not require an interim workspace, and instead
emits directly via the provided delegate
******************************************************************************/
void fromEntity (T) (const(T)[] src, scope void delegate(const(T)[]) emit)
{
ptrdiff_t delta;
auto s = src.ptr;
auto len = src.length;
// take a peek first to see if there's anything
if ((delta = Util.indexOf (s, '&', len)) < len)
{
// copy segments over, a chunk at a time
do {
emit (s [0 .. delta]);
len -= delta;
s += delta;
// translate entity
auto token = 0;
switch (s[1])
{
case 'a':
if (len > 4 && s[1..5] == "amp;")
emit("&"), token = 5;
else
if (len > 5 && s[1..6] == "apos;")
emit("'"), token = 6;
break;
case 'g':
if (len > 3 && s[1..4] == "gt;")
emit(">"), token = 4;
break;
case 'l':
if (len > 3 && s[1..4] == "lt;")
emit("<"), token = 4;
break;
case 'q':
if (len > 5 && s[1..6] == "quot;")
emit("\""), token = 6;
break;
default:
break;
}
if (token is 0)
emit ("&"), token = 1;
s += token, len -= token;
} while ((delta = Util.indexOf (s, '&', len)) < len);
// copy tail too
emit (s [0 .. len]);
}
else
emit (src);
}
/******************************************************************************
Convert reserved chars to entities. For example: " => "
A slice of the provided output buffer is returned. The output buffer should be sufficiently large to
accomodate the converted output, or it will be allocated from the
heap instead
******************************************************************************/
T[] toEntity(T) (const(T)[] src, T[] dst = null)
{
const(T)[] entity;
auto s = src.ptr;
auto t = s;
auto e = s + src.length;
auto index = 0;
while (s < e)
switch (*s)
{
case '"':
entity = """;
goto common;
case '>':
entity = ">";
goto common;
case '<':
entity = "<";
goto common;
case '&':
entity = "&";
goto common;
case '\'':
entity = "'";
goto common;
common:
auto len = s - t;
if (dst.length <= index + len + entity.length)
dst.length = (dst.length + len + entity.length) + dst.length / 2;
dst [index .. index + len] = t [0 .. len];
index += len;
dst [index .. index + entity.length] = entity;
index += entity.length;
t = ++s;
break;
default:
++s;
break;
}
// did we change anything?
if (index)
{
// copy tail too
auto len = e - t;
if (dst.length <= index + len)
dst.length = index + len;
dst [index .. index + len] = t [0 .. len];
return dst [0 .. index + len];
}
if(dst.length < src.length)
dst.length = src.length;
dst[0..src.length] = src;
return dst[0..src.length];
}
/******************************************************************************
Convert reserved chars to entities. For example: " => "
This variant does not require an interim workspace, and instead
emits directly via the provided delegate
******************************************************************************/
void toEntity(T) (const(T)[] src, scope void delegate(const(T)[]) emit)
{
const(T)[] entity;
auto s = src.ptr;
auto t = s;
auto e = s + src.length;
while (s < e)
switch (*s)
{
case '"':
entity = """;
goto common;
case '>':
entity = ">";
goto common;
case '<':
entity = "<";
goto common;
case '&':
entity = "&";
goto common;
case '\'':
entity = "'";
goto common;
common:
if (s - t > 0)
emit (t [0 .. s - t]);
emit (entity);
t = ++s;
break;
default:
++s;
break;
}
// did we change anything? Copy tail also
if (entity.length)
emit (t [0 .. e - t]);
else
emit (src);
}
/*******************************************************************************
*******************************************************************************/
debug (DocEntity)
{
import tango.io.Console;
void main()
{
auto s = fromEntity ("&");
assert (s == "&");
s = fromEntity (""");
assert (s == "\"");
s = fromEntity ("'");
assert (s == "'");
s = fromEntity (">");
assert (s == ">");
s = fromEntity ("<");
assert (s == "<");
s = fromEntity ("<&'");
assert (s == "<&'");
s = fromEntity ("*<&'*");
assert (s == "*<&'*");
assert (fromEntity ("abc") == "abc");
assert (fromEntity ("abc&") == "abc&");
assert (fromEntity ("abc<") == "abc<");
assert (fromEntity ("abc>goo") == "abc>goo");
assert (fromEntity ("&") == "&");
assert (fromEntity (""'") == "\"'");
assert (fromEntity ("&q&s") == "&q&s");
auto d = toEntity (">");
assert (d == ">");
d = toEntity ("<");
assert (d == "<");
d = toEntity ("&");
assert (d == "&");
d = toEntity ("'");
assert (d == "'");
d = toEntity ("\"");
assert (d == """);
d = toEntity ("^^>*>*");
assert (d == "^^>*>*");
}
}
|