123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283 |
|
/*******************************************************************************
Copyright: Copyright (C) 2008 Kris Bell, all rights reserved
License: BSD style: $(LICENSE)
version: July 2008: Initial release
Authors: Kris
*******************************************************************************/
module tango.text.json.JsonEscape;
private import tango.text.json.JsonParser;
private import Util = tango.text.Util;
private import Utf = tango.text.convert.Utf;
/******************************************************************************
Convert 'escaped' chars to normal ones. For example: \\ => \
The provided output buffer should be at least as long as the
input string, or it will be allocated from the heap instead.
Returns a slice of dst where the content required conversion,
or the provided src otherwise
******************************************************************************/
T[] unescape(T) (const(T)[] src, T[] dst = null)
{
size_t content;
void append (const(T)[] s)
{
if (content + s.length > dst.length)
dst.length = dst.length + s.length + 1024;
dst[content .. content+s.length] = s;
content += s.length;
}
unescape (src, &append);
return dst [0 .. content];
}
/******************************************************************************
Convert reserved chars to escaped ones. For example: \ => \\
Either a slice of the provided output buffer is returned, or the
original content, depending on whether there were reserved chars
present or not. The output buffer will be expanded as necessary
******************************************************************************/
T[] escape(T) (const(T)[] src, T[] dst = null)
{
size_t content;
void append (const(T)[] s)
{
if (content + s.length > dst.length)
dst.length = dst.length + s.length + 1024;
dst[content .. content+s.length] = s;
content += s.length;
}
escape (src, &append);
return dst [0..content];
}
/******************************************************************************
Convert 'escaped' chars to normal ones. For example: \\ => \
This variant does not require an interim workspace, and instead
emits directly via the provided delegate
******************************************************************************/
void unescape(T) (const(T)[] src, scope void delegate(const(T)[]) emit)
{
size_t delta;
auto s = src.ptr;
auto len = src.length;
enum:T {slash = '\\'}
// take a peek first to see if there's anything
if ((delta = Util.indexOf (s, slash, len)) < len)
{
// copy segments over, a chunk at a time
do {
emit (s[0 .. delta]);
len -= delta;
s += delta;
// bogus trailing '\'
if (len < 2)
{
emit ("\\");
len = 0;
break;
}
// translate \c
switch (s[1])
{
case '\\':
emit ("\\");
break;
case '/':
emit ("/");
break;
case '"':
emit (`"`);
break;
case 'b':
emit ("\b");
break;
case 'f':
emit ("\f");
break;
case 'n':
emit ("\n");
break;
case 'r':
emit ("\r");
break;
case 't':
emit ("\t");
break;
case 'u':
if (len < 6)
goto default;
else
{
dchar v = 0;
T[6] t = void;
for (auto i=2; i < 6; ++i)
{
T c = s[i];
if (c >= '0' && c <= '9')
{}
else
if (c >= 'a' && c <= 'f')
c -= 39;
else
if (c >= 'A' && c <= 'F')
c -= 7;
else
goto default;
v = (v << 4) + c - '0';
}
emit (Utf.fromString32 ((&v)[0..1], t));
len -= 4;
s += 4;
}
break;
default:
throw new Exception ("invalid escape");
}
s += 2;
len -= 2;
} while ((delta = Util.indexOf (s, slash, len)) < len);
// copy tail too
emit (s [0 .. len]);
}
else
emit (src);
}
/******************************************************************************
Convert reserved chars to escaped ones. For example: \ => \\
This variant does not require an interim workspace, and instead
emits directly via the provided delegate
******************************************************************************/
void escape(T) (const(T)[] src, scope void delegate(const(T)[]) emit)
{
T[2] patch = '\\';
auto s = src.ptr;
auto t = s;
auto e = s + src.length;
while (s < e)
{
switch (*s)
{
case '"':
case '/':
case '\\':
patch[1] = *s;
break;
case '\r':
patch[1] = 'r';
break;
case '\n':
patch[1] = 'n';
break;
case '\t':
patch[1] = 't';
break;
case '\b':
patch[1] = 'b';
break;
case '\f':
patch[1] = 'f';
break;
default:
++s;
continue;
}
emit (t [0 .. s - t]);
emit (patch);
t = ++s;
}
// did we change anything? Copy tail also
if (t is src.ptr)
emit (src);
else
emit (t [0 .. e - t]);
}
/******************************************************************************
******************************************************************************/
debug (JsonEscape)
{
import tango.io.Stdout;
void main()
{
escape ("abc");
assert (escape ("abc") == "abc");
assert (escape ("/abc") == `\/abc`, escape ("/abc"));
assert (escape ("ab\\c") == `ab\\c`, escape ("ab\\c"));
assert (escape ("abc\"") == `abc\"`);
assert (escape ("abc/") == `abc\/`);
assert (escape ("\n\t\r\b\f") == `\n\t\r\b\f`);
unescape ("abc");
unescape ("abc\\u0020x", (char[] p){Stdout(p);});
assert (unescape ("abc") == "abc");
assert (unescape ("abc\\") == "abc\\");
assert (unescape ("abc\\t") == "abc\t");
assert (unescape ("abc\\tc") == "abc\tc");
assert (unescape ("\\t") == "\t");
assert (unescape ("\\tx") == "\tx");
assert (unescape ("\\r\\rx") == "\r\rx");
assert (unescape ("abc\\t\\n\\bc") == "abc\t\n\bc");
assert (unescape ("abc\"\\n\\bc") == "abc\"\n\bc");
assert (unescape ("abc\\u002bx") == "abc+x");
}
}
|