123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
module tango.util.container.more.HashFile;

private import tango.io.device.FileMap : MappedFile;

/******************************************************************************

        HashFile implements a simple mechanism to store and recover a 
        large quantity of data for the duration of the hosting process.
        It is intended to act as a local-cache for a remote data-source, 
        or as a spillover area for large in-memory cache instances. 
        
        Note that any and all stored data is rendered invalid the moment
        a HashFile object is garbage-collected.

        The implementation follows a fixed-capacity record scheme, where
        content can be rewritten in-place until said capacity is reached.
        At such time, the altered content is moved to a larger capacity
        record at end-of-file, and a hole remains at the prior location.
        These holes are not collected, since the lifespan of a HashFile
        is limited to that of the host process.

        All index keys must be unique. Writing to the HashFile with an
        existing key will overwrite any previous content. What follows
        is a contrived example:
        
        ---
        alias HashFile!(char[], char[]) Bucket;

        auto bucket = new Bucket ("bucket.bin", HashFile.HalfK);

        // insert some data, and retrieve it again
        auto text = "this is a test";
        bucket.put ("a key", text);
        auto b = cast(char[]) bucket.get ("a key");

        assert (b == text);
        bucket.close;
        ---

******************************************************************************/

class HashFile(K, V)
{
        /**********************************************************************

                Define the capacity (block-size) of each record

        **********************************************************************/

        struct BlockSize
        {
                int capacity;
        }

        // backing storage
        private MappedFile              file;

        // memory-mapped content
        private ubyte[]                 heap;

        // basic capacity for each record
        private BlockSize               block;

        // pointers to file records
        private Record[K]               map;

        // current file size
        private ulong                   fileSize;

        // current file usage
        private ulong                   waterLine;

        // supported block sizes
        public enum BlockSize   EighthK  = {128-1},
                                QuarterK = {256-1},
                                HalfK    = {512-1},
                                OneK     = {1024*1-1},
                                TwoK     = {1024*2-1},
                                FourK    = {1024*4-1},
                                EightK   = {1024*8-1},
                                SixteenK = {1024*16-1},
                                ThirtyTwoK = {1024*32-1},
                                SixtyFourK = {1024*64-1};


        /**********************************************************************

                Construct a HashFile with the provided path, record-size,
                and inital record count. The latter causes records to be 
                pre-allocated, saving a certain amount of growth activity.
                Selecting a record size that roughly matches the serialized 
                content will limit 'thrashing'. 

        **********************************************************************/

        this (const(char[]) path, BlockSize block, uint initialRecords = 100)
        {
                this.block = block;

                // open a storage file
                file = new MappedFile (path);

                // set initial file size (cannot be zero)
                fileSize = initialRecords * (block.capacity + 1);

                // map the file content
                heap = file.resize (fileSize);
        }

        /**********************************************************************
        
                Return where the HashFile is located

        **********************************************************************/

        final const(char[]) path ()
        {
                return file.path;
        }

        /**********************************************************************

                Return the currently populated size of this HashFile

        **********************************************************************/

        final const ulong length ()
        {
                return waterLine;
        }

        /**********************************************************************

                Return the serialized data for the provided key. Returns
                null if the key was not found.

                Be sure to synchronize access by multiple threads

        **********************************************************************/

        final V get (K key, bool clear = false)
        {
                auto p = key in map;

                if (p)
                    return p.read (this, clear);
                return V.init;
        }

        /**********************************************************************

                Remove the provided key from this HashFile. Leaves a 
                hole in the backing file

                Be sure to synchronize access by multiple threads

        **********************************************************************/

        final void remove (K key)
        {
                map.remove (key);
        }

        /**********************************************************************

                Write a serialized block of data, and associate it with
                the provided key. All keys must be unique, and it is the
                responsibility of the programmer to ensure this. Reusing 
                an existing key will overwrite previous data. 

                Note that data is allowed to grow within the occupied 
                bucket until it becomes larger than the allocated space.
                When this happens, the data is moved to a larger bucket
                at the file tail.

                Be sure to synchronize access by multiple threads

        **********************************************************************/

        final void put (K key, V data, K function(K) retain = null)
        {
                auto r = key in map;
                
                if (r)
                    r.write (this, data, block);
                else
                   {
                   Record rr;
                   rr.write (this, data, block);
                   if (retain)
                       key = retain (key);
                   map [key] = rr;
                   }
        }

        /**********************************************************************

                Close this HashFile -- all content is lost.

        **********************************************************************/

        final void close ()
        {
                if (file)
                   {
                   file.close;
                   file = null;
                   map = null;
                   }
        }

        /**********************************************************************

                Each Record takes up a number of 'pages' within the file. 
                The size of these pages is determined by the BlockSize 
                provided during HashFile construction. Additional space
                at the end of each block is potentially wasted, but enables 
                content to grow in size without creating a myriad of holes.

        **********************************************************************/

        private struct Record
        {
                private ulong           offset;
                private int             used,
                                        capacity = -1;

                /**************************************************************

                        This should be protected from thread-contention at
                        a higher level.

                **************************************************************/

                V read (HashFile bucket, bool clear)
                {
                        if (used)
                           {
                           auto ret = cast(V) bucket.heap [cast(size_t)offset .. cast(size_t)(offset + used)];
                           if (clear)
                               used = 0;
                           return ret;
                           }
                        return V.init;
                }

                /**************************************************************

                        This should be protected from thread-contention at
                        a higher level.

                **************************************************************/

                void write (HashFile bucket, V data, BlockSize block)
                {
                        this.used = data.length;

                        // create new slot if we exceed capacity
                        if (this.used > this.capacity)
                            createBucket (bucket, this.used, block);

                        bucket.heap [cast(size_t)offset .. cast(size_t)(offset+used)] = cast(ubyte[]) data;
                }

                /**************************************************************

                **************************************************************/

                void createBucket (HashFile bucket, int bytes, BlockSize block)
                {
                        this.offset = bucket.waterLine;
                        this.capacity = (bytes + block.capacity) & ~block.capacity;
                        
                        bucket.waterLine += this.capacity;
                        if (bucket.waterLine > bucket.fileSize)
                           {
                           auto target = bucket.waterLine * 2;
                           debug(HashFile) 
                                 printf ("growing file from %lld, %lld, to %lld\n", 
                                          bucket.fileSize, bucket.waterLine, target);

                           // expand the physical file size and remap the heap
                           bucket.heap = bucket.file.resize (bucket.fileSize = target);
                           }
                }
        }
}


/******************************************************************************

******************************************************************************/

debug (HashFile)
{
        extern(C) int printf (const(char)*, ...);

        import tango.io.Path;
        import tango.io.Stdout;
        import tango.text.convert.Integer;

        void main()
        {
                alias HashFile!(char[], char[]) Bucket;

                auto file = new Bucket ("foo.map", Bucket.QuarterK, 1);
        
                char[16] tmp;
                for (int i=1; i < 1024; ++i)
                     file.put (format(tmp, i).dup, "blah");

                auto s = file.get ("1", true);
                if (s.length)
                    Stdout.formatln ("result '{}'", s);
                s = file.get ("1");
                if (s.length)
                    Stdout.formatln ("result '{}'", s);
                file.close;
                remove ("foo.map");
        }
}