123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
/*******************************************************************************

        Copyright: Copyright (C) 2008 Kris Bell, all rights reserved

        License:   BSD style: $(LICENSE)

        version:   July 2008: Initial release

        Authors:   Kris

*******************************************************************************/

module tango.text.json.JsonEscape;

private import tango.text.json.JsonParser;

private import Util = tango.text.Util;

private import Utf = tango.text.convert.Utf;

/******************************************************************************

        Convert 'escaped' chars to normal ones. For example: \\ => \

        The provided output buffer should be at least as long as the 
        input string, or it will be allocated from the heap instead.

        Returns a slice of dst where the content required conversion, 
        or the provided src otherwise
        
******************************************************************************/

T[] unescape(T) (const(T)[] src, T[] dst = null)
{
        size_t content;

        void append (const(T)[] s)
        {
                if (content + s.length > dst.length)
                    dst.length = dst.length + s.length + 1024;
                dst[content .. content+s.length] = s;
                content += s.length;
        }

        unescape (src, &append);
        return dst [0 .. content];
}


/******************************************************************************

        Convert reserved chars to escaped ones. For example: \ => \\ 

        Either a slice of the provided output buffer is returned, or the 
        original content, depending on whether there were reserved chars
        present or not. The output buffer will be expanded as necessary
        
******************************************************************************/

T[] escape(T) (const(T)[] src, T[] dst = null)
{
        size_t content;

        void append (const(T)[] s)
        {
                if (content + s.length > dst.length)
                    dst.length = dst.length + s.length + 1024;
                dst[content .. content+s.length] = s;
                content += s.length;
        }

        escape (src, &append);
        return dst [0..content];
}


/******************************************************************************

        Convert 'escaped' chars to normal ones. For example: \\ => \

        This variant does not require an interim workspace, and instead
        emits directly via the provided delegate
              
******************************************************************************/

void unescape(T) (const(T)[] src, scope void delegate(const(T)[]) emit)
{
        size_t delta;
        auto s = src.ptr;
        auto len = src.length;
        enum:T {slash = '\\'}

        // take a peek first to see if there's anything
        if ((delta = Util.indexOf (s, slash, len)) < len)
           {
           // copy segments over, a chunk at a time
           do {
              emit (s[0 .. delta]);
              len -= delta;
              s += delta;

              // bogus trailing '\'
              if (len < 2)
                 {
                 emit ("\\");
                 len = 0;
                 break;
                 }

              // translate \c
              switch (s[1])
                     {
                      case '\\':
                           emit ("\\");
                           break;

                      case '/':
                           emit ("/");
                           break;

                      case '"':
                           emit (`"`);
                           break;

                      case 'b':
                           emit ("\b");
                           break;

                      case 'f':
                           emit ("\f");
                           break;

                      case 'n':
                           emit ("\n");
                           break;

                      case 'r':
                           emit ("\r");
                           break;

                      case 't':
                           emit ("\t");
                           break;

                      case 'u':
                           if (len < 6)
                               goto default;
                           else
                              {
                              dchar v = 0;
                              T[6]  t = void;

                              for (auto i=2; i < 6; ++i)
                                  {
                                  T c = s[i];
                                  if (c >= '0' && c <= '9')
                                     {}
                                  else
                                     if (c >= 'a' && c <= 'f')
                                         c -= 39;
                                     else
                                        if (c >= 'A' && c <= 'F')
                                            c -= 7;
                                        else
                                           goto default;
                                  v = (v << 4) + c - '0';
                                  }
                              
                              emit (Utf.fromString32 ((&v)[0..1], t));
                              len -= 4;
                              s += 4;
                              }
                           break;

                      default:
                           throw new Exception ("invalid escape");
                     }

              s += 2;
              len -= 2;           
              } while ((delta = Util.indexOf (s, slash, len)) < len);

           // copy tail too
           emit (s [0 .. len]);
           }
        else
           emit (src);
}


/******************************************************************************

        Convert reserved chars to escaped ones. For example: \ => \\ 

        This variant does not require an interim workspace, and instead
        emits directly via the provided delegate
        
******************************************************************************/

void escape(T) (const(T)[] src, scope void delegate(const(T)[]) emit)
{
        T[2] patch = '\\';
        auto s = src.ptr;
        auto t = s;
        auto e = s + src.length;

        while (s < e)
              {
              switch (*s)
                     {
                     case '"':
                     case '/':
                     case '\\':
                          patch[1] = *s;
                          break;
                     case '\r':
                          patch[1] = 'r';
                          break;
                     case '\n':
                          patch[1] = 'n';
                          break;
                     case '\t':
                          patch[1] = 't';
                          break;
                     case '\b':
                          patch[1] = 'b';
                          break;
                     case '\f':
                          patch[1] = 'f';
                          break;
                     default:
                          ++s;
                          continue;
                     }
              emit (t [0 .. s - t]);
              emit (patch);
              t = ++s;
              }

        // did we change anything? Copy tail also
        if (t is src.ptr)
            emit (src);
        else
           emit (t [0 .. e - t]);
}


/******************************************************************************

******************************************************************************/

debug (JsonEscape)
{
        import tango.io.Stdout;

        void main()
        {
                escape ("abc");
                assert (escape ("abc") == "abc");
                assert (escape ("/abc") == `\/abc`, escape ("/abc"));
                assert (escape ("ab\\c") == `ab\\c`, escape ("ab\\c"));
                assert (escape ("abc\"") == `abc\"`);
                assert (escape ("abc/") == `abc\/`);
                assert (escape ("\n\t\r\b\f") == `\n\t\r\b\f`);

                unescape ("abc");
                unescape ("abc\\u0020x", (char[] p){Stdout(p);});
                assert (unescape ("abc") == "abc");
                assert (unescape ("abc\\") == "abc\\");
                assert (unescape ("abc\\t") == "abc\t");
                assert (unescape ("abc\\tc") == "abc\tc");
                assert (unescape ("\\t") == "\t");
                assert (unescape ("\\tx") == "\tx");
                assert (unescape ("\\r\\rx") == "\r\rx");
                assert (unescape ("abc\\t\\n\\bc") == "abc\t\n\bc");

                assert (unescape ("abc\"\\n\\bc") == "abc\"\n\bc");
                assert (unescape ("abc\\u002bx") == "abc+x");
        }

}