123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886
/*******************************************************************************

        copyright:      Copyright (c) 2008 Kris Bell. All rights reserved

        license:        BSD style: $(LICENSE)

        version:        Apr 2008: Initial release

        authors:        Kris

        Since:          0.99.7

        Based upon Doug Lea's Java collection package

*******************************************************************************/

module tango.util.container.HashSet;

private import  tango.util.container.Slink;

public  import  tango.util.container.Container;

private import tango.util.container.model.IContainer;

/*******************************************************************************

        Hash table implementation of a Set

        ---
        Iterator iterator ()
        int opApply (scope int delegate(ref V value) dg)

        bool add (V element)
        bool contains (V element)
        bool take (ref V element)
        bool remove (V element)
        size_t remove (IContainer!(V) e)
        bool replace (V oldElement, V newElement)

        size_t size ()
        bool isEmpty ()
        V[] toArray (V[] dst)
        HashSet dup ()
        HashSet clear ()
        HashSet reset ()

        size_t buckets ()
        void buckets (size_t cap)
        float threshold ()
        void threshold (float desired)
        ---

*******************************************************************************/

class HashSet (V, alias Hash = Container.hash, 
                  alias Reap = Container.reap, 
                  alias Heap = Container.DefaultCollect) 
                  : IContainer!(V)
{
        // use this type for Allocator configuration
        public alias Slink!(V)  Type;
        
        private alias Type      *Ref;

        private alias Heap!(Type) Alloc;

        // Each table entry is a list - null if no table allocated
        private Ref             table[];
        
        // number of elements contained
        private size_t          count;

        // the threshold load factor
        private float           loadFactor;

        // configured heap manager
        private Alloc           heap;
        
        // mutation tag updates on each change
        private size_t            mutation;

        /***********************************************************************

                Construct a HashSet instance

        ***********************************************************************/

        this (float f = Container.defaultLoadFactor)
        {
                loadFactor = f;
        }

        /***********************************************************************

                Clean up when deleted

        ***********************************************************************/

        ~this ()
        {
                reset;
        }

        /***********************************************************************

                Return a generic iterator for contained elements
                
        ***********************************************************************/

        final Iterator iterator ()
        {
                Iterator i = void;
                i.mutation = mutation;
                i.table = table;
                i.owner = this;
                i.cell = null;
                i.row = 0;
                return i;
        }

        /***********************************************************************


        ***********************************************************************/

        final int opApply (scope int delegate(ref V value) dg)
        {
                return iterator.opApply (dg);
        }

        /***********************************************************************

                Return the number of elements contained
                
        ***********************************************************************/

        @property final const size_t size ()
        {
                return count;
        }
        
        /***********************************************************************

                Add a new element to the set. Does not add if there is an
                equivalent already present. Returns true where an element
                is added, false where it already exists
                
                Time complexity: O(1) average; O(n) worst.
                
        ***********************************************************************/

        final bool add (V element)
        {
                if (table is null)
                    resize (Container.defaultInitialBuckets);

                auto h = Hash  (element, table.length);
                auto hd = table[h];

                if (hd && hd.find (element))
                    return false;

                table[h] = allocate.set (element, hd);
                increment;

                // only check if bin was nonempty                    
                if (hd)
                    checkLoad; 
                return true;
        }

        /***********************************************************************

                Does this set contain the given element?
        
                Time complexity: O(1) average; O(n) worst
                
        ***********************************************************************/

        final bool contains (V element)
        {
                if (count)
                   {
                   auto p = table[Hash (element, table.length)];
                   if (p && p.find (element))
                       return true;
                   }
                return false;
        }

        /***********************************************************************

                Make an independent copy of the container. Does not clone
                elements
                
                Time complexity: O(n)
                
        ***********************************************************************/

        @property final HashSet dup ()
        {
                auto clone = new HashSet!(V, Hash, Reap, Heap) (loadFactor);
                
                if (count)
                   {
                   clone.buckets (buckets);

                   foreach (value; iterator)
                            clone.add (value);
                   }
                return clone;
        }

        /***********************************************************************

                Remove the provided element. Returns true if found, false
                otherwise
                
                Time complexity: O(1) average; O(n) worst

        ***********************************************************************/

        final size_t remove (V element, bool all)
        {
                return remove(element) ? 1 : 0;
        }

        /***********************************************************************

                Remove the provided element. Returns true if found, false
                otherwise
                
                Time complexity: O(1) average; O(n) worst

        ***********************************************************************/

        final bool remove (V element)
        {
                if (count)
                   {
                   auto h = Hash (element, table.length);
                   auto hd = table[h];
                   auto trail = hd;
                   auto p = hd;

                   while (p)
                         {
                         auto n = p.next;
                         if (element == p.value)
                            {
                            decrement (p);
                            if (p is table[h])
                               {
                               table[h] = n;
                               trail = n;
                               }
                            else
                               trail.next = n;
                            return true;
                            } 
                         else
                            {
                            trail = p;
                            p = n;
                            }
                         }
                   }
                return false;
        }

        /***********************************************************************

                Replace the first instance of oldElement with newElement.
                Returns true if oldElement was found and replaced, false
                otherwise.
                
        ***********************************************************************/

        final size_t replace (V oldElement, V newElement, bool all)
        {
                return replace (oldElement, newElement) ? 1 : 0;
        }

        /***********************************************************************

                Replace the first instance of oldElement with newElement.
                Returns true if oldElement was found and replaced, false
                otherwise.
                
        ***********************************************************************/

        final bool replace (V oldElement, V newElement)
        {

                if (count && oldElement != newElement)
                   if (contains (oldElement))
                      {
                      remove (oldElement);
                      add (newElement);
                      return true;
                      }
                return false;
        }

        /***********************************************************************

                Remove and expose the first element. Returns false when no
                more elements are contained
        
                Time complexity: O(n)

        ***********************************************************************/

        final bool take (ref V element)
        {
                if (count)
                    foreach (ref list; table)
                             if (list)
                                {
                                auto p = list;
                                element = p.value;
                                list = p.next;
                                decrement (p);
                                return true;
                                }
                return false;
        }

        /***********************************************************************

        ************************************************************************/

        public void add (IContainer!(V) e)
        {
                foreach (value; e)
                         add (value);
        }

        /***********************************************************************

        ************************************************************************/

        public size_t remove (IContainer!(V) e)
        {
                size_t c;
                foreach (value; e)
                         if (remove (value))
                             ++c;
                return c;
        }

        /***********************************************************************

                Clears the HashMap contents. Various attributes are
                retained, such as the internal table itself. Invoke
                reset() to drop everything.

                Time complexity: O(n)
                
        ***********************************************************************/

        final HashSet clear ()
        {
                return clear (false);
        }

        /***********************************************************************

                Reset the HashSet contents and optionally configure a new
                heap manager. This releases more memory than clear() does

                Time complexity: O(1)
                
        ***********************************************************************/

        final HashSet reset ()
        {
                clear (true);
                heap.collect (table);
                table = null;
                return this;
        }

        /***********************************************************************

                Return the number of buckets

                Time complexity: O(1)

        ***********************************************************************/

        final size_t buckets ()
        {
                return table ? table.length : 0;
        }

        /***********************************************************************

                Set the number of buckets and resize as required
                
                Time complexity: O(n)

        ***********************************************************************/

        final HashSet buckets (size_t cap)
        {
                if (cap < Container.defaultInitialBuckets)
                    cap = Container.defaultInitialBuckets;

                if (cap !is buckets)
                    resize (cap);
                return this;
        }

        /***********************************************************************

                Return the resize threshold
                
                Time complexity: O(1)

        ***********************************************************************/

        final const float threshold ()
        {
                return loadFactor;
        }

        /***********************************************************************

                Set the resize threshold, and resize as required
                
                Time complexity: O(n)
                
        ***********************************************************************/

        final void threshold (float desired)
        {
                assert (desired > 0.0);
                loadFactor = desired;
                if (table)
                    checkLoad;
        }

        /***********************************************************************

                Configure the assigned allocator with the size of each
                allocation block (number of nodes allocated at one time)
                and the number of nodes to pre-populate the cache with.
                
                Time complexity: O(n)

        ***********************************************************************/

        final HashSet cache (size_t chunk, size_t count=0)
        {
                heap.config (chunk, count);
                return this;
        }

        /***********************************************************************

                Copy and return the contained set of values in an array, 
                using the optional dst as a recipient (which is resized 
                as necessary).

                Returns a slice of dst representing the container values.
                
                Time complexity: O(n)
                
        ***********************************************************************/

        final V[] toArray (V[] dst = null)
        {
                if (dst.length < count)
                    dst.length = count;

                size_t i = 0;
                foreach (v; this)
                         dst[i++] = v;
                return dst [0 .. count];                        
        }

        /***********************************************************************

                Is this container empty?
                
                Time complexity: O(1)
                
        ***********************************************************************/

        final const bool isEmpty ()
        {
                return count is 0;
        }

        /***********************************************************************

                Sanity check
                 
        ***********************************************************************/

        final HashSet check()
        {
                assert(!(table is null && count !is 0));
                assert((table is null || table.length > 0));
                assert(loadFactor > 0.0f);

                if (table)
                   {
                   size_t c = 0;
                   for (size_t i = 0; i < table.length; ++i)
                       {
                       for (auto p = table[i]; p; p = p.next)
                           {
                           ++c;
                           assert(contains(p.value));
                           assert(Hash (p.value, table.length) is i);
                           }
                       }
                   assert(c is count);
                   }
                return this;
        }

        /***********************************************************************

                Allocate a node instance. This is used as the default allocator
                 
        ***********************************************************************/

        private Ref allocate ()
        {
                return heap.allocate;
        }
        
        /***********************************************************************

                 Check to see if we are past load factor threshold. If so,
                 resize so that we are at half of the desired threshold.
                 
        ***********************************************************************/

        private void checkLoad ()
        {
                float fc = count;
                float ft = table.length;
                if (fc / ft > loadFactor)
                    resize (2 * cast(size_t)(fc / loadFactor) + 1);
        }

        /***********************************************************************

                resize table to new capacity, rehashing all elements
                
        ***********************************************************************/

        private void resize (size_t newCap)
        {
                //Stdout.formatln ("resize {}", newCap);
                auto newtab = heap.allocate (newCap);
                mutate;

                foreach (bucket; table)
                         while (bucket)
                               {
                               auto n = bucket.next;
                               auto h = Hash (bucket.value, newCap);
                               bucket.next = newtab[h];
                               newtab[h] = bucket;
                               bucket = n;
                               }

                // release the prior table and assign new one
                heap.collect (table);
                table = newtab;
        }

        /***********************************************************************

                Remove the indicated node. We need to traverse buckets
                for this, since we're singly-linked only. Better to save
                the per-node memory than to gain a little on each remove

                Used by iterators only
                 
        ***********************************************************************/

        private bool remove (Ref node, size_t row)
        {
                auto hd = table[row];
                auto trail = hd;
                auto p = hd;

                while (p)
                      {
                      auto n = p.next;
                      if (p is node)
                         {
                         decrement (p);
                         if (p is hd)
                             table[row] = n;
                         else
                            trail.next = n;
                         return true;
                         } 
                      else
                         {
                         trail = p;
                         p = n;
                         }
                      }
                return false;
        }

        /***********************************************************************

                Clears the HashSet contents. Various attributes are
                retained, such as the internal table itself. Invoke
                reset() to drop everything.

                Time complexity: O(n)
                
        ***********************************************************************/

        private HashSet clear (bool all)
        {
                mutate;

                // collect each node if we can't collect all at once
                if (heap.collect(all) is false)
                    foreach (ref v; table)
                             while (v)
                                   {
                                   auto n = v.next;
                                   decrement (v);
                                   v = n;
                                   }

                // retain table, but remove bucket chains
                foreach (ref v; table)
                         v = null;

                count = 0;
                return this;
        }

        /***********************************************************************

                new element was added
                
        ***********************************************************************/

        private void increment()
        {
                ++mutation;
                ++count;
        }
        
        /***********************************************************************

                element was removed
                
        ***********************************************************************/

        private void decrement (Ref p)
        {
                Reap (p.value);
                heap.collect (p);
                ++mutation;
                --count;
        }
        
        /***********************************************************************

                set was changed
                
        ***********************************************************************/

        private void mutate()
        {
                ++mutation;
        }

        /***********************************************************************

                Iterator with no filtering

        ***********************************************************************/

        private struct Iterator
        {
                size_t  row;
                Ref     cell,
                        prior;
                Ref[]   table;
                HashSet owner;
                size_t  mutation;

                /***************************************************************

                        Did the container change underneath us?

                ***************************************************************/

                bool valid ()
                {
                        return owner.mutation is mutation;
                }               

                /***************************************************************

                        Accesses the next value, and returns false when
                        there are no further values to traverse

                ***************************************************************/

                bool next (ref V v)
                {
                        auto n = next;
                        return (n) ? v = *n, true : false;
                }
                
                /***************************************************************

                        Return a pointer to the next value, or null when
                        there are no further values to traverse

                ***************************************************************/

                V* next ()
                {
                        while (cell is null)
                               if (row < table.length)
                                   cell = table [row++];
                               else
                                  return null;
  
                        prior = cell;
                        cell = cell.next;
                        return &prior.value;
                }

                /***************************************************************

                        Foreach support

                ***************************************************************/

                int opApply (scope int delegate(ref V value) dg)
                {
                        int result;

                        auto c = cell;
                        loop: while (true)
                              {
                              while (c is null)
                                     if (row < table.length)
                                         c = table [row++];
                                     else
                                        break loop;
  
                              prior = c;
                              c = c.next;
                              if ((result = dg(prior.value)) != 0)
                                   break loop;
                              }

                        cell = c;
                        return result;
                }                               

                /***************************************************************

                        Remove value at the current iterator location

                ***************************************************************/

                bool remove ()
                {
                        if (prior)
                            if (owner.remove (prior, row-1))
                               {
                               // ignore this change
                               ++mutation;
                               return true;
                               }

                        prior = null;
                        return false;
                }
        }
}



/*******************************************************************************

*******************************************************************************/

debug (HashSet)
{
        import tango.io.Stdout;
        import tango.core.Thread;
        import tango.time.StopWatch;
       
        void main()
        {
                // usage examples ...
                auto set = new HashSet!(char[]);
                set.add ("foo");
                set.add ("bar");
                set.add ("wumpus");

                // implicit generic iteration
                foreach (value; set)
                         Stdout (value).newline;

                // explicit generic iteration
                foreach (value; set.iterator)
                         Stdout (value).newline;

                // generic iteration with optional remove
                auto s = set.iterator;
                foreach (value; s)
                        {} // s.remove;

                // incremental iteration, with optional remove
                char[] v;
                auto iterator = set.iterator;
                while (iterator.next(v))
                      {} //iterator.remove;
                
                // incremental iteration, with optional failfast
                auto it = set.iterator;
                while (it.valid && it.next(v))
                      {}

                // remove specific element
                set.remove ("wumpus");

                // remove first element ...
                while (set.take(v))
                       Stdout.formatln ("taking {}, {} left", v, set.size);
                
                
                // setup for benchmark, with a set of integers. We
                // use a chunk allocator, and presize the bucket[]
                auto test = new HashSet!(int, Container.hash, Container.reap, Container.Chunk);
                test.cache (1000, 1_000_000);
                test.buckets = 1_500_000;
                const count = 1_000_000;
                StopWatch w;

                // benchmark adding
                w.start;
                for (int i=count; i--;)
                     test.add(i);
                Stdout.formatln ("{} adds: {}/s", test.size, test.size/w.stop);

                // benchmark reading
                w.start;
                for (int i=count; i--;)
                     test.contains(i);
                Stdout.formatln ("{} lookups: {}/s", test.size, test.size/w.stop);

                // benchmark adding without allocation overhead
                test.clear;
                w.start;
                for (int i=count; i--;)
                     test.add(i);
                Stdout.formatln ("{} adds (after clear): {}/s", test.size, test.size/w.stop);

                // benchmark duplication
                w.start;
                auto dup = test.dup;
                Stdout.formatln ("{} element dup: {}/s", dup.size, dup.size/w.stop);

                // benchmark iteration
                w.start;
                foreach (value; test) {}
                Stdout.formatln ("{} element iteration: {}/s", test.size, test.size/w.stop);

                test.check;
        }
}