123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604
/*******************************************************************************

        Copyright: Copyright (C) 2008 Aaron Craelius & Kris Bell.  
                   All rights reserved.

        License:   BSD style: $(LICENSE)

        version:   Initial release: July 2008      

        Authors:   Aaron, Kris

*******************************************************************************/

module tango.text.json.JsonParser;

private import tango.util.container.more.Stack;

/*******************************************************************************

*******************************************************************************/

class JsonParser(T)
{
        public enum Token
               {
               Empty, Name, String, Number, BeginObject, EndObject, 
               BeginArray, EndArray, True, False, Null
               }

        private enum State {Object, Array};

        private struct Iterator
        {
                const(T)*      ptr;
                const(T)*      end;
                const(T)[]     text;

                void reset (const(T)[] text)
                {
                        this.text = text;
                        this.ptr = text.ptr;
                        this.end = ptr + text.length;
                }
        }

        protected Iterator              str;
        private Stack!(State, 16)       state;
        private const(T)*               curLoc;
        private size_t                  curLen;
        private State                   curState; 
        protected Token                 curType;
        
        /***********************************************************************
        
        ***********************************************************************/
        
        this (const(T)[] text = null)
        {
                reset (text);
        }
        
        /***********************************************************************
        
        ***********************************************************************/
        
        @property final bool next ()
        {
                if (str.ptr is null || str.end is null)
                    return false;

                auto p = str.ptr;
                auto e = str.end;


                while (*p <= 32 && p < e) 
                       ++p; 

                if ((str.ptr = p) >= e) 
                     return false;

                if (curState is State.Array) 
                    return parseArrayValue();

                switch (curType)
                       {
                       case Token.Name:
                            return parseMemberValue();

                       default:                
                            break;
                       }

                return parseMemberName();
        }
        
        /***********************************************************************
        
        ***********************************************************************/
        
        @property final Token type ()
        {
                return curType;
        }
        
        /***********************************************************************
        
        ***********************************************************************/
        
        @property final const(T)[] value ()
        {
                return curLoc [0 .. curLen];
        }
        
        /***********************************************************************
        
        ***********************************************************************/
        
        bool reset (const(T)[] json = null)
        {
                state.clear();
                str.reset (json);
                curType = Token.Empty;
                curState = State.Object;

                if (json.length)
                   {
                   auto p = str.ptr;
                   auto e = str.end;

                   while (*p <= 32 && p < e) 
                          ++p; 
                   if (p < e)
                       return start (*(str.ptr = p));
                   }
                return false;
        }

        /***********************************************************************
        
        ***********************************************************************/
        
        protected final void expected (immutable(char)[] token)
        {
                throw new Exception ("expected " ~ token);
        }
        
        /***********************************************************************
        
        ***********************************************************************/
        
        protected final void expected (immutable(char)[] token, const(T)* point)
        {
                static char[] itoa (char[] buf, size_t i)
                {
                        auto p = buf.ptr+buf.length;
                        do {
                           *--p = '0' + i % 10;
                           } while (i /= 10);
                        return p[0..(buf.ptr+buf.length)-p];
                }
                char[32] tmp = void;
                expected (token ~ " @input[" ~ itoa(tmp, point-str.text.ptr).idup~"]");
        }
        
        /***********************************************************************
        
        ***********************************************************************/
        
        private void unexpectedEOF (immutable(char)[] msg)
        {
                throw new Exception ("unexpected end-of-input: " ~ msg);
        }
                
        /***********************************************************************
        
        ***********************************************************************/
        
        private bool start (T c)
        {
                if (c is '{') 
                    return push (Token.BeginObject, State.Object);

                if (c is '[') 
                    return push (Token.BeginArray, State.Array);

                expected ("'{' or '[' at start of document");
                assert(0);
        }

        /***********************************************************************
        
        ***********************************************************************/
        
        private bool parseMemberName ()
        {
                auto p = str.ptr;
                auto e = str.end;

                if(*p is '}') 
                    return pop (Token.EndObject);
                
                if(*p is ',') 
                    ++p;
                
                while (*p <= 32) 
                       ++p;

                if (*p != '"')
                {
                    if (*p == '}')
                        expected ("an attribute-name after (a potentially trailing) ','", p);
                    else
                       expected ("'\"' before attribute-name", p);
                }
                curLoc = p+1;
                curType = Token.Name;

                while (++p < e) 
                       if (*p is '"' && !escaped(p))
                           break;

                if (p < e) 
                    curLen = p - curLoc;
                else
                   unexpectedEOF ("in attribute-name");

                str.ptr = p + 1;
                return true;
        }
        
        /***********************************************************************
        
        ***********************************************************************/
        
        private bool parseMemberValue ()
        {
                auto p = str.ptr;

                if(*p != ':') 
                   expected ("':' before attribute-value", p);

                auto e = str.end;
                while (++p < e && *p <= 32) {}

                return parseValue (*(str.ptr = p));
        }
        
        /***********************************************************************
        
        ***********************************************************************/
        
        private bool parseValue (T c)
        {                       
                switch (c)
                       {
                       case '{':
                            return push (Token.BeginObject, State.Object);
         
                       case '[':
                            return push (Token.BeginArray, State.Array);
        
                       case '"':
                            return doString();
        
                       case 'n':
                            if (match ("null", Token.Null))
                                return true;
                            expected ("'null'", str.ptr);
                            break;
                       case 't':
                            if (match ("true", Token.True))
                                return true;
                            expected ("'true'", str.ptr);
                            break;
                       case 'f':
                            if (match ("false", Token.False))
                                return true;
                            expected ("'false'", str.ptr);
                            break;
                       default:
                            break;
                       }

                return parseNumber();
        }
        
        /***********************************************************************
        
        ***********************************************************************/
        
        private bool doString ()
        {
                auto p = str.ptr;
                auto e = str.end;

                curLoc = p+1;
                curType = Token.String;
                
                while (++p < e) 
                       if (*p is '"' && !escaped(p))
                           break;

                if (p < e) 
                    curLen = p - curLoc;
                else
                   unexpectedEOF ("in string");

                str.ptr = p + 1;
                return true;
        }
        
        /***********************************************************************
        
        ***********************************************************************/
        
        private bool parseNumber ()
        {
                auto p = str.ptr;
                auto e = str.end;
                T c = *(curLoc = p);

                curType = Token.Number;

                if (c is '-' || c is '+')
                    c = *++p;

                while (c >= '0' && c <= '9') c = *++p;                 

                if (c is '.')
                    while (c = *++p, c >= '0' && c <= '9') {}                 

                if (c is 'e' || c is 'E')
                    while (c = *++p, c >= '0' && c <= '9') {}

                if (p < e) 
                    curLen = p - curLoc;
                else
                   unexpectedEOF ("after number");

                str.ptr = p;
                return curLen > 0;
        }
        
        /***********************************************************************
        
        ***********************************************************************/
        
        private bool match (const(T)[] name, Token token)
        {
                auto i = name.length;
                if (str.ptr[0 .. i] == name)
                   {
                   curLoc = str.ptr;
                   curType = token;
                   str.ptr += i;
                   curLen = i;
                   return true;
                   }
                return false;
        }
        
        /***********************************************************************
        
        ***********************************************************************/
        
        private bool push (Token token, State next)
        {
                curLen = 0;
                curType = token;
                curLoc = str.ptr++;
                state.push (curState);
                curState = next;
                return true;
        }
        
        /***********************************************************************
        
        ***********************************************************************/
        
        private bool pop (Token token)
        {
                curLen = 0;
                curType = token;
                curLoc = str.ptr++;
                curState = state.pop();
                return true;
        }

        /***********************************************************************
        
        ***********************************************************************/
        
        private bool parseArrayValue ()
        {
                auto p = str.ptr;
                if (*p is ']') 
                    return pop (Token.EndArray);
                
                if (*p is ',') 
                    ++p;

                auto e = str.end;
                while (p < e && *p <= 32) 
                       ++p;

                return parseValue (*(str.ptr = p));
        }

        /***********************************************************************
        
        ***********************************************************************/
        
        private int escaped (const(T)* p)
        {
                int i;

                while (*--p is '\\')
                       ++i;
                return i & 1;
        }
}



debug(UnitTest)
{       
                immutable(char)[] json = 
                "{"
                "\"glossary\": {"
                "\"title\": \"example glossary\","
                "\"GlossDiv\": {"
                "   \"title\": \"S\","
                "   \"GlossList\": {"
                "       \"GlossEntry\": {"
                "           \"ID\": \"SGML\","
                "           \"SortAs\": \"SGML\","
                "           \"GlossTerm\": \"Standard Generalized Markup Language\","
                "           \"Acronym\": \"SGML\","
                "           \"Abbrev\": \"ISO 8879:1986\","
                "           \"GlossDef\": {"
                "                \"para\": \"A meta-markup language, used to create markup languages such as DocBook.\","
                "            \"GlossSeeAlso\": [\"GML\", \"XML\"]"
                "           },"
                "          \"GlossSee\": \"markup\","
                "          \"ANumber\": 12345.6e7"
                "          \"True\": true"
                "          \"False\": false"
                "          \"Null\": null"
                "        }"
                "    }"
                "}"
                "}"
                "}";
       
unittest
{
        auto p = new JsonParser!(char)(json);
        assert(p);
        assert(p.type == p.Token.BeginObject);
        assert(p.next);
        assert(p.type == p.Token.Name);
        assert(p.value == "glossary", p.value);
        assert(p.next);
        assert(p.value == "", p.value);
        assert(p.type == p.Token.BeginObject);
        assert(p.next);
        assert(p.type == p.Token.Name);
        assert(p.value == "title", p.value);
        assert(p.next);
        assert(p.type == p.Token.String);
        assert(p.value == "example glossary", p.value);
        assert(p.next);
        assert(p.type == p.Token.Name);
        assert(p.value == "GlossDiv", p.value);
        assert(p.next);
        assert(p.type == p.Token.BeginObject);
        assert(p.next);
        assert(p.type == p.Token.Name);
        assert(p.value == "title", p.value);
        assert(p.next);
        assert(p.type == p.Token.String);
        assert(p.value == "S", p.value);
        assert(p.next);
        assert(p.type == p.Token.Name);
        assert(p.value == "GlossList", p.value);
        assert(p.next);
        assert(p.type == p.Token.BeginObject);
        assert(p.next);
        assert(p.type == p.Token.Name);
        assert(p.value == "GlossEntry", p.value);
        assert(p.next);
        assert(p.type == p.Token.BeginObject);
        assert(p.next);
        assert(p.type == p.Token.Name);
        assert(p.value == "ID", p.value);
        assert(p.next);
        assert(p.type == p.Token.String);
        assert(p.value == "SGML", p.value);
        assert(p.next);
        assert(p.type == p.Token.Name);
        assert(p.value == "SortAs", p.value);
        assert(p.next);
        assert(p.type == p.Token.String);
        assert(p.value == "SGML", p.value);
        assert(p.next);
        assert(p.type == p.Token.Name);
        assert(p.value == "GlossTerm", p.value);
        assert(p.next);
        assert(p.type == p.Token.String);
        assert(p.value == "Standard Generalized Markup Language", p.value);
        assert(p.next);
        assert(p.type == p.Token.Name);
        assert(p.value == "Acronym", p.value);
        assert(p.next);
        assert(p.type == p.Token.String);
        assert(p.value == "SGML", p.value);
        assert(p.next);
        assert(p.type == p.Token.Name);
        assert(p.value == "Abbrev", p.value);
        assert(p.next);
        assert(p.type == p.Token.String);
        assert(p.value == "ISO 8879:1986", p.value);
        assert(p.next);
        assert(p.type == p.Token.Name);
        assert(p.value == "GlossDef", p.value);
        assert(p.next);
        assert(p.type == p.Token.BeginObject);
        assert(p.next);
        assert(p.type == p.Token.Name);
        assert(p.value == "para", p.value);
        assert(p.next);

        assert(p.type == p.Token.String);
        assert(p.value == "A meta-markup language, used to create markup languages such as DocBook.", p.value);
        assert(p.next);
        assert(p.type == p.Token.Name);
        assert(p.value == "GlossSeeAlso", p.value);
        assert(p.next);
        assert(p.type == p.Token.BeginArray);
        assert(p.next);
        assert(p.type == p.Token.String);
        assert(p.value == "GML", p.value);
        assert(p.next);
        assert(p.type == p.Token.String);
        assert(p.value == "XML", p.value);
        assert(p.next);
        assert(p.type == p.Token.EndArray);
        assert(p.next);
        assert(p.type == p.Token.EndObject);
        assert(p.next);
        assert(p.type == p.Token.Name);
        assert(p.value == "GlossSee", p.value);
        assert(p.next);
        assert(p.type == p.Token.String);
        assert(p.value == "markup", p.value);
        assert(p.next);
        assert(p.type == p.Token.Name);
        assert(p.value == "ANumber", p.value);
        assert(p.next);
        assert(p.type == p.Token.Number);
        assert(p.value == "12345.6e7", p.value);
        assert(p.next);
        assert(p.type == p.Token.Name);
        assert(p.value == "True", p.value);
        assert(p.next);
        assert(p.type == p.Token.True);
        assert(p.next);
        assert(p.type == p.Token.Name);
        assert(p.value == "False", p.value);
        assert(p.next);
        assert(p.type == p.Token.False);
        assert(p.next);
        assert(p.type == p.Token.Name);
        assert(p.value == "Null", p.value);
        assert(p.next);
        assert(p.type == p.Token.Null);
        assert(p.next);
        assert(p.type == p.Token.EndObject);
        assert(p.next);
        assert(p.type == p.Token.EndObject);
        assert(p.next);
        assert(p.type == p.Token.EndObject);
        assert(p.next);
        assert(p.type == p.Token.EndObject);
        assert(p.next);
        assert(p.type == p.Token.EndObject);
        assert(!p.next);

        assert(p.state.size == 0);

}

}


debug (JsonParser)
{
        void main()
        {
                auto json = new JsonParser!(char);
        }
}