--- parser3/src/include/pa_hash.h 2001/04/05 16:30:41 1.35 +++ parser3/src/include/pa_hash.h 2009/04/17 13:13:09 1.71 @@ -1,139 +1,514 @@ /** @file Parser: hash class decl. - Copyright (c) 2001 ArtLebedev Group (http://www.artlebedev.com) + Copyright (c) 2001-2005 ArtLebedev Group (http://www.artlebedev.com) - Author: Alexander Petrosyan (http://design.ru/paf) + Author: Alexandr Petrosian (http://paf.design.ru) +*/ - $Id: pa_hash.h,v 1.35 2001/04/05 16:30:41 paf Exp $ +/* + The prime numbers used from zend_hash.c, + the part of Zend scripting engine library, + Copyrighted (C) 1999-2000 Zend Technologies Ltd. + http://www.zend.com/license/0_92.txt + For more information about Zend please visit http://www.zend.com/ */ #ifndef PA_HASH_H #define PA_HASH_H -#include +static const char * const IDENT_HASH_H="$Date: 2009/04/17 13:13:09 $"; -#include "pa_pool.h" +#include "pa_memory.h" #include "pa_types.h" -#include "pa_string.h" +const int HASH_ALLOCATES_COUNT=29; + +/** Zend comment: Generated on an Octa-ALPHA 300MHz CPU & 2.5GB RAM monster + + paf: HPUX ld could not handle static member: unsatisfied symbols +*/ +static uint Hash_allocates[HASH_ALLOCATES_COUNT]={ + 5, 11, 19, 53, 107, 223, 463, 983, 1979, 3907, 7963, + 16229, 32531, 65407, 130987, 262237, 524521, 1048793, + 2097397, 4194103, 8388857, 16777447, 33554201, 67108961, + 134217487, 268435697, 536870683, 1073741621, 2147483399}; + +/// useful generic hash function +inline void generic_hash_code(uint& result, char c) { + result=(result<<4)+c; + if(uint g=(result&0xF0000000)) { + result=result^(g>>24); + result=result^g; + } +} +/// useful generic hash function +inline void generic_hash_code(uint& result, const char* s) { + while(char c=*s++) { + result=(result<<4)+c; + if(uint g=(result&0xF0000000)) { + result=result^(g>>24); + result=result^g; + } + } +} + +/// useful generic hash function +inline void generic_hash_code(uint& result, const char* buf, size_t size) { + const char* end=buf+size; + while(buf>24); + result=result^g; + } + } +} + +/// simple hash code of int. used by EXIF mapping +inline uint hash_code(int self) { + uint result=0; + generic_hash_code(result, (const char*)&self, sizeof(self)); + return result; +} /** - Pooled hash. + Simple hash. - Automatically rehashed when almost full. + Automatically rehashed when almost is_full. + Contains no 0 values. + get returning 0 means there were no such. + "put value 0" means "remove" */ -class Hash : public Pooled { +template class Hash: public PA_Object { public: - typedef String Key; ///< hash Key type. longing for templates - typedef void Val; ///< hash Val type. longing for templates + typedef K key_type; + typedef V value_type; - /// for_each iterator function type - typedef void (*For_each_func)(const Key& key, Val *value, void *info); + Hash() { + allocated=Hash_allocates[allocates_index=0]; + threshold=allocated*THRESHOLD_PERCENT/100; + fpairs_count=fused_refs=0; + refs=new(UseGC) Pair*[allocated]; + } -public: + Hash(const Hash& source) { + allocates_index=source.allocates_index; + allocated=source.allocated; + threshold=source.threshold; + fused_refs=source.fused_refs; + fpairs_count=source.fpairs_count; + refs=new(UseGC) Pair*[allocated]; + + // clone & rehash + Pair **old_ref=source.refs; + for(int index=0; indexlink; - Hash(Pool& apool) : Pooled(apool) { - construct(apool); + Pair **new_ref=&refs[index]; + *new_ref=new Pair(pair->code, pair->key, pair->value, *new_ref); + + pair=next; + } } - /// useful generic hash function - static uint generic_code(uint aresult, const char *start, uint allocated); + ~Hash() { + delete[] refs; + } - /// put a [value] under the [key], return existed or not - /*SYNCHRONIZED*/ bool put(const Key& key, Val *value); -/* - /// dirty hack to allow constant items storage. I long for Hash - /*SYNCHRONIZED* / bool put(const Key& key, const Val *value) { - return put(key, const_cast(value)); + /// put a [value] under the [key] @returns existed or not + bool put(K key, V value) { + if(!value) { + remove(key); + return false; + } + if(is_full()) + expand(); + + uint code=hash_code(key); + uint index=code%allocated; + Pair **ref=&refs[index]; + for(Pair *pair=*ref; pair; pair=pair->link) + if(pair->code==code && pair->key==key) { + // found a pair with the same key + pair->value=value; + return true; + } + + // proper pair not found -- create&link_in new pair + if(!*ref) // root cell were fused_refs? + fused_refs++; // not, we'll use it and record the fact + *ref=new Pair(code, key, value, *ref); + fpairs_count++; + return false; } -*/ + + /// put a [value] under the [key] @returns existed or not + template R replace_maybe_append(K key, V value, F prevent, I info) { + if(!value) { + // they can come here from somewhere (true with maybe_replace_maybe_append, keeping parallel) + remove(key); + // this has nothing to do with properties, doing no special property handling here + return 0; + } + + if(is_full()) + expand(); + + uint code=hash_code(key); + uint index=code%allocated; + Pair **ref=&refs[index]; + for(Pair *pair=*ref; pair; pair=pair->link) + if(pair->code==code && pair->key==key) { + // found a pair with the same key + pair->value=value; + return reinterpret_cast(1); + } + + // proper pair not found + // prevent-function intercepted append? + if(R result=prevent(value, info)) + return result; + + //create&link_in new pair + if(!*ref) // root cell were fused_refs? + fused_refs++; // not, we'll use it and record the fact + *ref=new Pair(code, key, value, *ref); + fpairs_count++; + return 0; + } + + /// put a [value] under the [key] @returns existed or not + template + R maybe_replace_maybe_append(K key, V value, F1 prevent_replace, F2 prevent_append, I info) + { + if(!value) { + // they can come here from Temp_value_element::dctor to restore some empty value + remove(key); + // this has nothing to do with properties, doing no special property handling here + return 0; + } + + if(is_full()) + expand(); + + uint code=hash_code(key); + uint index=code%allocated; + Pair **ref=&refs[index]; + for(Pair *pair=*ref; pair; pair=pair->link) + if(pair->code==code && pair->key==key) { + // found a pair with the same key + + // prevent-function intercepted replace? + if(R result=prevent_replace(pair->value, info)) + return result; + + pair->value=value; + return reinterpret_cast(1); + } + + // proper pair not found + // prevent-function intercepted append? + if(R result=prevent_append(value, info)) + return result; + + //create&link_in new pair + if(!*ref) // root cell were fused_refs? + fused_refs++; // not, we'll use it and record the fact + *ref=new Pair(code, key, value, *ref); + fpairs_count++; + return 0; + } + + /// put a [value] under the [key] @returns existed or not + template + R maybe_replace_never_append(K key, V value, F1 prevent_replace, I info) + { + if(!value) { + // they can come here from somewhere (true with maybe_replace_maybe_append, keeping parallel) + remove(key); + // this has nothing to do with properties, doing no special property handling here + return 0; + } + + if(is_full()) + expand(); + + uint code=hash_code(key); + uint index=code%allocated; + Pair **ref=&refs[index]; + for(Pair *pair=*ref; pair; pair=pair->link) + if(pair->code==code && pair->key==key) { + // found a pair with the same key + + // prevent-function intercepted replace? + if(R result=prevent_replace(pair->value, info)) + return result; + + pair->value=value; + return reinterpret_cast(1); + } + + return 0; + } + + /// remove the [key] @returns existed or not + bool remove(K key) { + uint code=hash_code(key); + uint index=code%allocated; + for(Pair **ref=&refs[index]; *ref; ref=&(*ref)->link) + if((*ref)->code==code && (*ref)->key==key) { + // found a pair with the same key + Pair *next=(*ref)->link; + delete *ref; + *ref=next; + --fpairs_count; + return true; + } + + return false; + } + + /// return true if key exists + bool contains(K key){ + uint code=hash_code(key); + uint index=code%allocated; + for(Pair *pair=refs[index]; pair; pair=pair->link){ + if(pair->code==code && pair->key==key) + return true; + } + + return false; + } + /// get associated [value] by the [key] - /*SYNCHRONIZED*/ Val *get(const Key& key) const; + V get(K key) const { + uint code=hash_code(key); + uint index=code%allocated; + for(Pair *pair=refs[index]; pair; pair=pair->link) + if(pair->code==code && pair->key==key) + return pair->value; + + return V(0); + } - /// put a [value] under the [key] if that [key] existed, return existed or not - /*SYNCHRONIZED*/ bool put_replace(const Key& key, Val *value); + /// get associated [value] by the [key] + [code] (faster) + V get_by_hash_code(uint code, K key) const { + uint index=code%allocated; + for(Pair *pair=refs[index]; pair; pair=pair->link) + if(pair->code==code && pair->key==key) + return pair->value; + + return V(0); + } + + /// put a [value] under the [key] if that [key] existed @returns existed or not + bool put_replaced(K key, V value) { + if(!value) { + remove(key); + return false; + } + uint code=hash_code(key); + uint index=code%allocated; + for(Pair *pair=refs[index]; pair; pair=pair->link) + if(pair->code==code && pair->key==key) { + // found a pair with the same key, replacing + pair->value=value; + return true; + } - /// put a [value] under the [key] if that [key] NOT existed, return existed or not - /*SYNCHRONIZED*/ bool put_dont_replace(const Key& key, Val *value); + // proper pair not found + return false; + } - /// put all 'src' values if NO with same key existed - /*SYNCHRONIZED*/ void merge_dont_replace(const Hash& src); + /// put a [value] under the [key] if that [key] existed @returns existed or not + template R maybe_put_replaced(K key, V value, F prevent) { + if(!value) { + // they can come here from Temp_value_element::dctor to restore some empty value + remove(key); + // this has nothing to do with properties, doing no special property handling here + return 0; + } + + uint code=hash_code(key); + uint index=code%allocated; + for(Pair *pair=refs[index]; pair; pair=pair->link) + if(pair->code==code && pair->key==key) { + // found a pair with the same key, replacing + // prevent-function intercepted put? + if(R result=prevent(pair->value)) + return result; + + pair->value=value; + return reinterpret_cast(1); + } - void put(const Key& key, int value) { put(key, reinterpret_cast(value)); } - void put(const Key& key, String *value) { put(key, static_cast(value)); } + // proper pair not found + return 0; + } - //@{ - /// handy get, longing for Hash, Hash - int get_int(const Key& key) const { return reinterpret_cast(get(key)); } - const String *get_string(const Key& key) const { return static_cast(get(key)); } - //@} + /// put a [value] under the [key] if that [key] NOT existed @returns existed or not + bool put_dont_replace(K key, V value) { + if(!value) { + remove(key); + return false; + } + if(is_full()) + expand(); + + uint code=hash_code(key); + uint index=code%allocated; + Pair **ref=&refs[index]; + for(Pair *pair=*ref; pair; pair=pair->link) + if(pair->code==code && pair->key==key) { + // found a pair with the same key, NOT replacing + return true; + } + + // proper pair not found -- create&link_in new pair + if(!*ref) // root cell were fused_refs? + fused_refs++; // not, we'll use it and record the fact + *ref=new Pair(code, key, value, *ref); + fpairs_count++; + return false; + } + + /** put all 'src' values if NO with same key existed + @todo optimize this.allocated==src.allocated case + */ + void merge_dont_replace(const Hash& src) { + for(int i=0; ilink) + put_dont_replace(pair->key, pair->value); + } /// number of elements in hash - int size() { return used; } + int count() const { return fpairs_count; } - /// iterate over all not zero elements - void for_each(For_each_func func, void *info=0); + /// iterate over all pairs + template void for_each(void callback(K, V, I), I info) const { + Pair **ref=refs; + for(int index=0; indexlink) + callback(pair->key, pair->value, info); + } - /// remove all elements - void clear(); + /// iterate over all pairs + template void for_each_ref(void callback(K, V&, I), I info) const { + Pair **ref=refs; + for(int index=0; indexlink) + callback(pair->key, pair->value, info); + } -protected: + /// iterate over all pairs until condition becomes true, return that element + template V first_that(bool callback(K, V, I), I info) const { + Pair **ref=refs; + for(int index=0; indexlink) + if(callback(pair->key, pair->value, info)) + return pair->value; + + return V(0); + } - void construct(Pool& apool); + /// remove all elements + void clear() { + memset(refs, 0, sizeof(*refs)*allocated); + fpairs_count=fused_refs=0; + } private: - // expand when these %% of allocated exausted + /// expand when these %% of allocated exausted enum { THRESHOLD_PERCENT=75 }; - // the index of [allocated] in [allocates] + /// the index of [allocated] in [Hash_allocates] int allocates_index; - // possible [allocates]. prime numbers - static uint allocates[]; - static int allocates_count; - - // number of allocated pairs + /// number of allocated pairs int allocated; - // helper: expanding when used == threshold + /// helper: expanding when fused_refs == threshold int threshold; - // used pairs - int used; + /// used pairs + int fused_refs; - // main storage - class Pair { - friend Hash; + /// stored pairs total (including those by links) + int fpairs_count; + /// pair storage + class Pair: public PA_Allocated { + public: uint code; - const Key key; - Val *value; + K key; + V value; Pair *link; - void *operator new(size_t allocated, Pool& apool); - - Pair(uint acode, const Key& akey, Val *avalue, Pair *alink) : + Pair(uint acode, K akey, V avalue, Pair *alink) : code(acode), key(akey), value(avalue), link(alink) {} } **refs; - // filled to threshold: needs expanding - bool full() { return used==threshold; } + /// filled to threshold: needs expanding + bool is_full() { return fused_refs==threshold; } + + /// allocate larger buffer & rehash + void expand() { + int old_allocated=allocated; + Pair **old_refs=refs; + + allocates_index=allocates_index+1link; + + uint new_index=pair->code%allocated; + Pair **new_ref=&refs[new_index]; + pair->link=*new_ref; + *new_ref=pair; - // allocate larger buffer & rehash - void expand(); + pair=next; + } + + delete[] old_refs; + } private: //disabled - //Hash(Hash&) {} Hash& operator = (const Hash&) { return *this; } }; +/// Auto-object used to temporarily substituting/removing hash values +template +class Temp_hash_value { + Hash& fhash; + K fname; + V saved_value; +public: + Temp_hash_value(Hash& ahash, K aname, V avalue) : + fhash(ahash), + fname(aname), + saved_value(ahash.get(aname)) { + fhash.put(aname, avalue); + } + ~Temp_hash_value() { + fhash.put(fname, saved_value); + } +}; + #endif