123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886 |
|
/*******************************************************************************
copyright: Copyright (c) 2008 Kris Bell. All rights reserved
license: BSD style: $(LICENSE)
version: Apr 2008: Initial release
authors: Kris
Since: 0.99.7
Based upon Doug Lea's Java collection package
*******************************************************************************/
module tango.util.container.HashSet;
private import tango.util.container.Slink;
public import tango.util.container.Container;
private import tango.util.container.model.IContainer;
/*******************************************************************************
Hash table implementation of a Set
---
Iterator iterator ()
int opApply (scope int delegate(ref V value) dg)
bool add (V element)
bool contains (V element)
bool take (ref V element)
bool remove (V element)
size_t remove (IContainer!(V) e)
bool replace (V oldElement, V newElement)
size_t size ()
bool isEmpty ()
V[] toArray (V[] dst)
HashSet dup ()
HashSet clear ()
HashSet reset ()
size_t buckets ()
void buckets (size_t cap)
float threshold ()
void threshold (float desired)
---
*******************************************************************************/
class HashSet (V, alias Hash = Container.hash,
alias Reap = Container.reap,
alias Heap = Container.DefaultCollect)
: IContainer!(V)
{
// use this type for Allocator configuration
public alias Slink!(V) Type;
private alias Type *Ref;
private alias Heap!(Type) Alloc;
// Each table entry is a list - null if no table allocated
private Ref table[];
// number of elements contained
private size_t count;
// the threshold load factor
private float loadFactor;
// configured heap manager
private Alloc heap;
// mutation tag updates on each change
private size_t mutation;
/***********************************************************************
Construct a HashSet instance
***********************************************************************/
this (float f = Container.defaultLoadFactor)
{
loadFactor = f;
}
/***********************************************************************
Clean up when deleted
***********************************************************************/
~this ()
{
reset;
}
/***********************************************************************
Return a generic iterator for contained elements
***********************************************************************/
final Iterator iterator ()
{
Iterator i = void;
i.mutation = mutation;
i.table = table;
i.owner = this;
i.cell = null;
i.row = 0;
return i;
}
/***********************************************************************
***********************************************************************/
final int opApply (scope int delegate(ref V value) dg)
{
return iterator.opApply (dg);
}
/***********************************************************************
Return the number of elements contained
***********************************************************************/
@property final const size_t size ()
{
return count;
}
/***********************************************************************
Add a new element to the set. Does not add if there is an
equivalent already present. Returns true where an element
is added, false where it already exists
Time complexity: O(1) average; O(n) worst.
***********************************************************************/
final bool add (V element)
{
if (table is null)
resize (Container.defaultInitialBuckets);
auto h = Hash (element, table.length);
auto hd = table[h];
if (hd && hd.find (element))
return false;
table[h] = allocate.set (element, hd);
increment;
// only check if bin was nonempty
if (hd)
checkLoad;
return true;
}
/***********************************************************************
Does this set contain the given element?
Time complexity: O(1) average; O(n) worst
***********************************************************************/
final bool contains (V element)
{
if (count)
{
auto p = table[Hash (element, table.length)];
if (p && p.find (element))
return true;
}
return false;
}
/***********************************************************************
Make an independent copy of the container. Does not clone
elements
Time complexity: O(n)
***********************************************************************/
@property final HashSet dup ()
{
auto clone = new HashSet!(V, Hash, Reap, Heap) (loadFactor);
if (count)
{
clone.buckets (buckets);
foreach (value; iterator)
clone.add (value);
}
return clone;
}
/***********************************************************************
Remove the provided element. Returns true if found, false
otherwise
Time complexity: O(1) average; O(n) worst
***********************************************************************/
final size_t remove (V element, bool all)
{
return remove(element) ? 1 : 0;
}
/***********************************************************************
Remove the provided element. Returns true if found, false
otherwise
Time complexity: O(1) average; O(n) worst
***********************************************************************/
final bool remove (V element)
{
if (count)
{
auto h = Hash (element, table.length);
auto hd = table[h];
auto trail = hd;
auto p = hd;
while (p)
{
auto n = p.next;
if (element == p.value)
{
decrement (p);
if (p is table[h])
{
table[h] = n;
trail = n;
}
else
trail.next = n;
return true;
}
else
{
trail = p;
p = n;
}
}
}
return false;
}
/***********************************************************************
Replace the first instance of oldElement with newElement.
Returns true if oldElement was found and replaced, false
otherwise.
***********************************************************************/
final size_t replace (V oldElement, V newElement, bool all)
{
return replace (oldElement, newElement) ? 1 : 0;
}
/***********************************************************************
Replace the first instance of oldElement with newElement.
Returns true if oldElement was found and replaced, false
otherwise.
***********************************************************************/
final bool replace (V oldElement, V newElement)
{
if (count && oldElement != newElement)
if (contains (oldElement))
{
remove (oldElement);
add (newElement);
return true;
}
return false;
}
/***********************************************************************
Remove and expose the first element. Returns false when no
more elements are contained
Time complexity: O(n)
***********************************************************************/
final bool take (ref V element)
{
if (count)
foreach (ref list; table)
if (list)
{
auto p = list;
element = p.value;
list = p.next;
decrement (p);
return true;
}
return false;
}
/***********************************************************************
************************************************************************/
public void add (IContainer!(V) e)
{
foreach (value; e)
add (value);
}
/***********************************************************************
************************************************************************/
public size_t remove (IContainer!(V) e)
{
size_t c;
foreach (value; e)
if (remove (value))
++c;
return c;
}
/***********************************************************************
Clears the HashMap contents. Various attributes are
retained, such as the internal table itself. Invoke
reset() to drop everything.
Time complexity: O(n)
***********************************************************************/
final HashSet clear ()
{
return clear (false);
}
/***********************************************************************
Reset the HashSet contents and optionally configure a new
heap manager. This releases more memory than clear() does
Time complexity: O(1)
***********************************************************************/
final HashSet reset ()
{
clear (true);
heap.collect (table);
table = null;
return this;
}
/***********************************************************************
Return the number of buckets
Time complexity: O(1)
***********************************************************************/
final size_t buckets ()
{
return table ? table.length : 0;
}
/***********************************************************************
Set the number of buckets and resize as required
Time complexity: O(n)
***********************************************************************/
final HashSet buckets (size_t cap)
{
if (cap < Container.defaultInitialBuckets)
cap = Container.defaultInitialBuckets;
if (cap !is buckets)
resize (cap);
return this;
}
/***********************************************************************
Return the resize threshold
Time complexity: O(1)
***********************************************************************/
final const float threshold ()
{
return loadFactor;
}
/***********************************************************************
Set the resize threshold, and resize as required
Time complexity: O(n)
***********************************************************************/
final void threshold (float desired)
{
assert (desired > 0.0);
loadFactor = desired;
if (table)
checkLoad;
}
/***********************************************************************
Configure the assigned allocator with the size of each
allocation block (number of nodes allocated at one time)
and the number of nodes to pre-populate the cache with.
Time complexity: O(n)
***********************************************************************/
final HashSet cache (size_t chunk, size_t count=0)
{
heap.config (chunk, count);
return this;
}
/***********************************************************************
Copy and return the contained set of values in an array,
using the optional dst as a recipient (which is resized
as necessary).
Returns a slice of dst representing the container values.
Time complexity: O(n)
***********************************************************************/
final V[] toArray (V[] dst = null)
{
if (dst.length < count)
dst.length = count;
size_t i = 0;
foreach (v; this)
dst[i++] = v;
return dst [0 .. count];
}
/***********************************************************************
Is this container empty?
Time complexity: O(1)
***********************************************************************/
final const bool isEmpty ()
{
return count is 0;
}
/***********************************************************************
Sanity check
***********************************************************************/
final HashSet check()
{
assert(!(table is null && count !is 0));
assert((table is null || table.length > 0));
assert(loadFactor > 0.0f);
if (table)
{
size_t c = 0;
for (size_t i = 0; i < table.length; ++i)
{
for (auto p = table[i]; p; p = p.next)
{
++c;
assert(contains(p.value));
assert(Hash (p.value, table.length) is i);
}
}
assert(c is count);
}
return this;
}
/***********************************************************************
Allocate a node instance. This is used as the default allocator
***********************************************************************/
private Ref allocate ()
{
return heap.allocate;
}
/***********************************************************************
Check to see if we are past load factor threshold. If so,
resize so that we are at half of the desired threshold.
***********************************************************************/
private void checkLoad ()
{
float fc = count;
float ft = table.length;
if (fc / ft > loadFactor)
resize (2 * cast(size_t)(fc / loadFactor) + 1);
}
/***********************************************************************
resize table to new capacity, rehashing all elements
***********************************************************************/
private void resize (size_t newCap)
{
//Stdout.formatln ("resize {}", newCap);
auto newtab = heap.allocate (newCap);
mutate;
foreach (bucket; table)
while (bucket)
{
auto n = bucket.next;
auto h = Hash (bucket.value, newCap);
bucket.next = newtab[h];
newtab[h] = bucket;
bucket = n;
}
// release the prior table and assign new one
heap.collect (table);
table = newtab;
}
/***********************************************************************
Remove the indicated node. We need to traverse buckets
for this, since we're singly-linked only. Better to save
the per-node memory than to gain a little on each remove
Used by iterators only
***********************************************************************/
private bool remove (Ref node, size_t row)
{
auto hd = table[row];
auto trail = hd;
auto p = hd;
while (p)
{
auto n = p.next;
if (p is node)
{
decrement (p);
if (p is hd)
table[row] = n;
else
trail.next = n;
return true;
}
else
{
trail = p;
p = n;
}
}
return false;
}
/***********************************************************************
Clears the HashSet contents. Various attributes are
retained, such as the internal table itself. Invoke
reset() to drop everything.
Time complexity: O(n)
***********************************************************************/
private HashSet clear (bool all)
{
mutate;
// collect each node if we can't collect all at once
if (heap.collect(all) is false)
foreach (ref v; table)
while (v)
{
auto n = v.next;
decrement (v);
v = n;
}
// retain table, but remove bucket chains
foreach (ref v; table)
v = null;
count = 0;
return this;
}
/***********************************************************************
new element was added
***********************************************************************/
private void increment()
{
++mutation;
++count;
}
/***********************************************************************
element was removed
***********************************************************************/
private void decrement (Ref p)
{
Reap (p.value);
heap.collect (p);
++mutation;
--count;
}
/***********************************************************************
set was changed
***********************************************************************/
private void mutate()
{
++mutation;
}
/***********************************************************************
Iterator with no filtering
***********************************************************************/
private struct Iterator
{
size_t row;
Ref cell,
prior;
Ref[] table;
HashSet owner;
size_t mutation;
/***************************************************************
Did the container change underneath us?
***************************************************************/
bool valid ()
{
return owner.mutation is mutation;
}
/***************************************************************
Accesses the next value, and returns false when
there are no further values to traverse
***************************************************************/
bool next (ref V v)
{
auto n = next;
return (n) ? v = *n, true : false;
}
/***************************************************************
Return a pointer to the next value, or null when
there are no further values to traverse
***************************************************************/
V* next ()
{
while (cell is null)
if (row < table.length)
cell = table [row++];
else
return null;
prior = cell;
cell = cell.next;
return &prior.value;
}
/***************************************************************
Foreach support
***************************************************************/
int opApply (scope int delegate(ref V value) dg)
{
int result;
auto c = cell;
loop: while (true)
{
while (c is null)
if (row < table.length)
c = table [row++];
else
break loop;
prior = c;
c = c.next;
if ((result = dg(prior.value)) != 0)
break loop;
}
cell = c;
return result;
}
/***************************************************************
Remove value at the current iterator location
***************************************************************/
bool remove ()
{
if (prior)
if (owner.remove (prior, row-1))
{
// ignore this change
++mutation;
return true;
}
prior = null;
return false;
}
}
}
/*******************************************************************************
*******************************************************************************/
debug (HashSet)
{
import tango.io.Stdout;
import tango.core.Thread;
import tango.time.StopWatch;
void main()
{
// usage examples ...
auto set = new HashSet!(char[]);
set.add ("foo");
set.add ("bar");
set.add ("wumpus");
// implicit generic iteration
foreach (value; set)
Stdout (value).newline;
// explicit generic iteration
foreach (value; set.iterator)
Stdout (value).newline;
// generic iteration with optional remove
auto s = set.iterator;
foreach (value; s)
{} // s.remove;
// incremental iteration, with optional remove
char[] v;
auto iterator = set.iterator;
while (iterator.next(v))
{} //iterator.remove;
// incremental iteration, with optional failfast
auto it = set.iterator;
while (it.valid && it.next(v))
{}
// remove specific element
set.remove ("wumpus");
// remove first element ...
while (set.take(v))
Stdout.formatln ("taking {}, {} left", v, set.size);
// setup for benchmark, with a set of integers. We
// use a chunk allocator, and presize the bucket[]
auto test = new HashSet!(int, Container.hash, Container.reap, Container.Chunk);
test.cache (1000, 1_000_000);
test.buckets = 1_500_000;
const count = 1_000_000;
StopWatch w;
// benchmark adding
w.start;
for (int i=count; i--;)
test.add(i);
Stdout.formatln ("{} adds: {}/s", test.size, test.size/w.stop);
// benchmark reading
w.start;
for (int i=count; i--;)
test.contains(i);
Stdout.formatln ("{} lookups: {}/s", test.size, test.size/w.stop);
// benchmark adding without allocation overhead
test.clear;
w.start;
for (int i=count; i--;)
test.add(i);
Stdout.formatln ("{} adds (after clear): {}/s", test.size, test.size/w.stop);
// benchmark duplication
w.start;
auto dup = test.dup;
Stdout.formatln ("{} element dup: {}/s", dup.size, dup.size/w.stop);
// benchmark iteration
w.start;
foreach (value; test) {}
Stdout.formatln ("{} element iteration: {}/s", test.size, test.size/w.stop);
test.check;
}
}
|