--- parser3/src/include/pa_hash.h 2001/01/27 12:04:53 1.2 +++ parser3/src/include/pa_hash.h 2010/08/11 16:17:27 1.83 @@ -1,88 +1,644 @@ -/* - $Id: pa_hash.h,v 1.2 2001/01/27 12:04:53 paf Exp $ -*/ +/** @file + Parser: hash class decl. -/* + Copyright (c) 2001-2009 ArtLebedev Group (http://www.artlebedev.com) + Author: Alexandr Petrosian (http://paf.design.ru) +*/ +/* + The prime numbers used from zend_hash.c, + the part of Zend scripting engine library, + Copyrighted (C) 1999-2000 Zend Technologies Ltd. + http://www.zend.com/license/0_92.txt + For more information about Zend please visit http://www.zend.com/ */ #ifndef PA_HASH_H #define PA_HASH_H -#include +static const char * const IDENT_HASH_H="$Date: 2010/08/11 16:17:27 $"; +#include "pa_memory.h" #include "pa_types.h" #include "pa_string.h" -class Pool; +const int HASH_ALLOCATES_COUNT=29; + +/** Zend comment: Generated on an Octa-ALPHA 300MHz CPU & 2.5GB RAM monster + + paf: HPUX ld could not handle static member: unsatisfied symbols +*/ +static uint Hash_allocates[HASH_ALLOCATES_COUNT]={ + 5, 11, 19, 53, 107, 223, 463, 983, 1979, 3907, 7963, + 16229, 32531, 65407, 130987, 262237, 524521, 1048793, + 2097397, 4194103, 8388857, 16777447, 33554201, 67108961, + 134217487, 268435697, 536870683, 1073741621, 2147483399}; + +/// useful generic hash function +inline void generic_hash_code(uint& result, char c) { + result=(result<<4)+c; + if(uint g=(result&0xF0000000)) { + result=result^(g>>24); + result=result^g; + } +} +/// useful generic hash function +inline void generic_hash_code(uint& result, const char* s) { + while(char c=*s++) { + result=(result<<4)+c; + if(uint g=(result&0xF0000000)) { + result=result^(g>>24); + result=result^g; + } + } +} + +/// useful generic hash function +inline void generic_hash_code(uint& result, const char* buf, size_t size) { + const char* end=buf+size; + while(buf>24); + result=result^g; + } + } +} + +/// simple hash code of int. used by EXIF mapping +inline uint hash_code(int self) { + uint result=0; + generic_hash_code(result, (const char*)&self, sizeof(self)); + return result; +} + +#endif // PA_HASH_H + +#ifndef PA_HASH_CLASS +#define PA_HASH_CLASS +/** + Simple hash. + + Automatically rehashed when almost is_full. + Contains no 0 values. + get returning 0 means there were no such. + "put value 0" means "remove" +*/ +#ifdef HASH_ORDER -typedef String Key; -typedef void Value; +#undef HASH +#undef HASH_STRING +#undef HASH_NEW_PAIR +#undef HASH_FOR_EACH + +#define HASH OrderedHash +#define HASH_STRING OrderedHashString +#define HASH_NEW_PAIR(code, key, value) *ref=new Pair(code, key, value, *ref, this->last); this->last=&((*ref)->next) + +#define HASH_FOR_EACH \ + for(Pair *pair=this->first; pair; pair=pair->next) + +#else + +#define HASH Hash +#define HASH_STRING HashString +#define HASH_NEW_PAIR(code, key, value) *ref=new Pair(code, key, value, *ref) + +#define HASH_FOR_EACH \ + Pair **ref=this->refs; \ + for(int index=0; indexallocated; index++) \ + for(Pair *pair=*ref++; pair; pair=pair->link) -class Hash { +#endif + +template class HASH: public PA_Object { public: -private: - friend Pool; + typedef K key_type; + typedef V value_type; - // expand when there would be used this %% of size - enum { - THRESHOLD_PERCENT=75 - }; + HASH() { + allocated=Hash_allocates[allocates_index=0]; + fpairs_count=fused_refs=0; + refs=new(UseGC) Pair*[allocated]; +#ifdef HASH_ORDER + first=0; + last=&first; +#endif + } - // the pool I'm allocated on - Pool *pool; + HASH(const HASH& source) { + allocates_index=source.allocates_index; + allocated=source.allocated; + fused_refs=source.fused_refs; + fpairs_count=source.fpairs_count; + refs=new(UseGC) Pair*[allocated]; + // clone & rehash +#ifdef HASH_ORDER + first=0; + last=&first; + for(Pair *pair=source.first; pair; pair=pair->next) + { + uint index=pair->code%allocated; + Pair **ref=&refs[index]; + HASH_NEW_PAIR(pair->code, pair->key, pair->value); + } +#else + for(int i=0; ilink) + { + Pair **ref=&refs[i]; + HASH_NEW_PAIR(pair->code, pair->key, pair->value); + } +#endif + } - // the index of size in sizes - int size_index; +#ifdef USE_DESTRUCTORS + ~HASH() { + Pair **ref=refs; + for(int index=0; indexlink; + delete pair; + pair=next; + } + delete[] refs; + } +#endif - // possible sizes. prime numbers - static uint sizes[]; - static int sizes_count; + /// put a [value] under the [key] @returns existed or not + bool put(K key, V value) { + if(!value) { + remove(key); + return false; + } + if(is_full()) + expand(); + + uint code=hash_code(key); + uint index=code%allocated; + Pair **ref=&refs[index]; + for(Pair *pair=*ref; pair; pair=pair->link) + if(pair->code==code && pair->key==key) { + // found a pair with the same key + pair->value=value; + return true; + } + + // proper pair not found -- create&link_in new pair + if(!*ref) // root cell were fused_refs? + fused_refs++; // not, we'll use it and record the fact + HASH_NEW_PAIR(code, key, value); + fpairs_count++; + return false; + } - // number of allocated pairs - int size; + /// remove the [key] @returns existed or not + bool remove(K key) { + uint code=hash_code(key); + uint index=code%allocated; + for(Pair **ref=&refs[index]; *ref; ref=&(*ref)->link){ + Pair *pair=*ref; + if(pair->code==code && pair->key==key) { + // found a pair with the same key + Pair *next=pair->link; +#ifdef HASH_ORDER + *(pair->prev)=pair->next; + if(pair->next) + pair->next->prev=pair->prev; + else + last=pair->prev; +#endif + delete pair; + *ref=next; + --fpairs_count; + return true; + } + } - // helper: expanding when used == threshold - int threshold; + return false; + } - // used pairs - int used; + /// return true if key exists + bool contains(K key){ + uint code=hash_code(key); + uint index=code%allocated; + for(Pair *pair=refs[index]; pair; pair=pair->link){ + if(pair->code==code && pair->key==key) + return true; + } - // main storage - class Pair { - friend Hash; + return false; + } - uint code; - Key key; - Value *value; - Pair *link; + /// get associated [value] by the [key] + V get(K key) const { + uint code=hash_code(key); + uint index=code%allocated; + for(Pair *pair=refs[index]; pair; pair=pair->link) + if(pair->code==code && pair->key==key) + return pair->value; - void *operator new(size_t size, Pool *apool); + return V(0); + } + +#ifdef HASH_ORDER + V first_value() const { + return (first) ? first->value : V(0); + } + + V last_value() const { + return (fpairs_count) ? ((Pair *)((char *)last - offsetof(Pair, next)))->value : V(0); + } +#endif + + /// put a [value] under the [key] if that [key] existed @returns existed or not + bool put_replaced(K key, V value) { + if(!value) { + remove(key); + return false; + } + uint code=hash_code(key); + uint index=code%allocated; + for(Pair *pair=refs[index]; pair; pair=pair->link) + if(pair->code==code && pair->key==key) { + // found a pair with the same key, replacing + pair->value=value; + return true; + } + + // proper pair not found + return false; + } + + /// put a [value] under the [key] if that [key] NOT existed @returns existed or not + bool put_dont_replace(K key, V value) { + if(!value) { + remove(key); + return false; + } + if(is_full()) + expand(); + + uint code=hash_code(key); + uint index=code%allocated; + Pair **ref=&refs[index]; + for(Pair *pair=*ref; pair; pair=pair->link) + if(pair->code==code && pair->key==key) { + // found a pair with the same key, NOT replacing + return true; + } + + // proper pair not found -- create&link_in new pair + if(!*ref) // root cell were fused_refs? + fused_refs++; // not, we'll use it and record the fact + HASH_NEW_PAIR(code, key, value); + fpairs_count++; + return false; + } + + /// put all 'src' values if NO with same key existed + void merge_dont_replace(const HASH& src) { +#ifdef HASH_ORDER + for(Pair *pair=src.first; pair; pair=pair->next) +#else + for(int i=0; ilink) +#endif + put_dont_replace(pair->key, pair->value); + } + + /// number of elements in hash + int count() const { return fpairs_count; } + + /// iterate over all pairs + template void for_each(void callback(K, V, I), I info) const { + HASH_FOR_EACH + callback(pair->key, pair->value, info); + } + + /// iterate over all pairs + template void for_each_ref(void callback(K, V&, I), I info) const { + HASH_FOR_EACH + callback(pair->key, pair->value, info); + } + + /// iterate over all pairs until condition becomes true, return that element + template V first_that(bool callback(K, V, I), I info) const { + HASH_FOR_EACH + if(callback(pair->key, pair->value, info)) + return pair->value; + return V(0); + } + + /// remove all elements + void clear() { + memset(refs, 0, sizeof(*refs)*allocated); + fpairs_count=fused_refs=0; +#ifdef HASH_ORDER + first=0; + last=&first; +#endif + } + +protected: + + /// the index of [allocated] in [Hash_allocates] + int allocates_index; + + /// number of allocated pairs + int allocated; - Pair(uint acode, Key& akey, Value *avalue, Pair *alink) : - code(acode), - key(akey), - value(avalue), - link(alink) {} + /// used pairs + int fused_refs; + + /// stored pairs total (including those by links) + int fpairs_count; + + /// pair storage + class Pair: public PA_Allocated { + public: + uint code; + K key; + V value; + Pair *link; +#ifdef HASH_ORDER + Pair **prev; + Pair *next; + + Pair(uint acode, K akey, V avalue, Pair *alink, Pair **aprev) : code(acode), key(akey), value(avalue), link(alink), + prev(aprev), next(0) { *aprev=this; } +#else + Pair(uint acode, K akey, V avalue, Pair *alink) : code(acode), key(akey), value(avalue), link(alink) {} +#endif } **refs; - // new&constructors made private to enforce factory manufacturing at pool - void *operator new(size_t size, Pool *apool); +#ifdef HASH_ORDER + Pair *first; + Pair **last; +#endif + + /// filled to threshold (THRESHOLD_PERCENT=75), needs expanding + bool is_full() { return fused_refs + allocated/4 >= allocated; } - Hash(Pool *apool); + /// allocate larger buffer & rehash + void expand() { + int old_allocated=allocated; + Pair **old_refs=refs; + + if (allocates_indexlink; + + uint new_index=pair->code%allocated; + Pair **new_ref=&refs[new_index]; + pair->link=*new_ref; + *new_ref=pair; - bool full() { - return used==threshold; + pair=next; + } + + delete[] old_refs; } - void expand(); +private: //disabled + + HASH& operator = (const HASH&) { return *this; } +}; + +/** + Simple String::body hash. + Allows hash code caching +*/ + +#ifdef HASH_CODE_CACHING + +template class HASH_STRING: public HASH { public: - static uint generic_code(uint aresult, char *start, uint size); - void put(Key& key, Value *value); - Value* get(Key& key); + typedef typename HASH::Pair Pair; + typedef const String::Body &K; + + typedef K key_type; + + /// put a [value] under the [key] @returns existed or not + bool put(K str, V value) { + if(!value) { + remove(str); + return false; + } + if(this->is_full()) + this->expand(); + + CORD key=str.get_cord(); + + uint code=str.get_hash_code(); + uint index=code%this->allocated; + Pair **ref=&this->refs[index]; + for(Pair *pair=*ref; pair; pair=pair->link) + if(pair->code==code && CORD_cmp(pair->key,key)==0) { + // found a pair with the same key + pair->value=value; + return true; + } + + // proper pair not found -- create&link_in new pair + if(!*ref) // root cell were fused_refs? + this->fused_refs++; // not, we'll use it and record the fact + HASH_NEW_PAIR(code, key, value); + this->fpairs_count++; + return false; + } + + /// remove the [key] @returns existed or not + bool remove(K str) { + CORD key=str.get_cord(); + uint code=str.get_hash_code(); + uint index=code%this->allocated; + for(Pair **ref=&this->refs[index]; *ref; ref=&(*ref)->link){ + Pair *pair=*ref; + if(pair->code==code && CORD_cmp(pair->key,key)==0) { + // found a pair with the same key + Pair *next=pair->link; +#ifdef HASH_ORDER + *(pair->prev)=pair->next; + if(pair->next) + pair->next->prev=pair->prev; + else + this->last=pair->prev; +#endif + delete pair; + *ref=next; + --this->fpairs_count; + return true; + } + } + + return false; + } + + /// return true if key exists + bool contains(K str){ + CORD key=str.get_cord(); + uint code=str.get_hash_code(); + uint index=code%this->allocated; + for(Pair *pair=this->refs[index]; pair; pair=pair->link){ + if(pair->code==code && CORD_cmp(pair->key,key)==0) + return true; + } + + return false; + } + + /// get associated [value] by the [key] + V get(K str) const { + CORD key=str.get_cord(); + uint code=str.get_hash_code(); + uint index=code%this->allocated; + for(Pair *pair=this->refs[index]; pair; pair=pair->link) + if(pair->code==code && CORD_cmp(pair->key,key)==0) + return pair->value; + + return V(0); + } + + /// put a [value] under the [key] if that [key] existed @returns existed or not + bool put_replaced(K str, V value) { + if(!value) { + remove(str); + return false; + } + + CORD key=str.get_cord(); + uint code=str.get_hash_code(); + uint index=code%this->allocated; + for(Pair *pair=this->refs[index]; pair; pair=pair->link) + if(pair->code==code && CORD_cmp(pair->key,key)==0) { + // found a pair with the same key, replacing + pair->value=value; + return true; + } + + // proper pair not found + return false; + } + + /// put a [value] under the [key] if that [key] NOT existed @returns existed or not + bool put_dont_replace(K str, V value) { + if(!value) { + remove(str); + return false; + } + if(this->is_full()) + this->expand(); + + CORD key=str.get_cord(); + uint code=str.get_hash_code(); + uint index=code%this->allocated; + Pair **ref=&this->refs[index]; + for(Pair *pair=*ref; pair; pair=pair->link) + if(pair->code==code && CORD_cmp(pair->key,key)==0) { + // found a pair with the same key, NOT replacing + return true; + } + + // proper pair not found -- create&link_in new pair + if(!*ref) // root cell were fused_refs? + this->fused_refs++; // not, we'll use it and record the fact + HASH_NEW_PAIR(code, key, value); + this->fpairs_count++; + return false; + } + + /// put all 'src' values if NO with same key existed + void merge_dont_replace(const HASH_STRING& src) { +#ifdef HASH_ORDER + for(Pair *pair=src.first; pair; pair=pair->next) +#else + for(int i=0; ilink) +#endif + put_dont_replace(String::Body(pair->key, pair->code), pair->value); + } + + /// iterate over all pairs + template void for_each(void callback(K, V, I), I info) const { + HASH_FOR_EACH + callback(String::Body(pair->key, pair->code), pair->value, info); + } + + /// iterate over all pairs + template void for_each_ref(void callback(K, V&, I), I info) const { + HASH_FOR_EACH + callback(String::Body(pair->key, pair->code), pair->value, info); + } + + /// iterate over all pairs until condition becomes true, return that element + template V first_that(bool callback(K, V, I), I info) const { + HASH_FOR_EACH + if(callback(String::Body(pair->key, pair->code), pair->value, info)) + return pair->value; + return V(0); + } + + /// simple hash iterator + class Iterator { + const HASH_STRING& fhash; + Pair *fcurrent; + public: + Iterator(const HASH_STRING& ahash): fhash(ahash) { + fcurrent=fhash.first; + } + + operator bool () { + return fcurrent != 0; + } + + void next() { + fcurrent=fcurrent->next; + } + + String::Body key(){ + return String::Body(fcurrent->key, fcurrent->code); + } + + V value(){ + return fcurrent->value; + } + }; }; +#else //HASH_CODE_CACHING + +template class HASH_STRING: public HASH{}; +#endif //HASH_CODE_CACHING + +#ifndef HASH_ORDER +/// Auto-object used to temporarily substituting/removing string hash values +template +class Temp_hash_value { + HashString &fhash; + K fname; + V saved_value; +public: + Temp_hash_value(HashString& ahash, K aname, V avalue) : + fhash(ahash), + fname(aname), + saved_value(ahash.get(aname)) { + fhash.put(aname, avalue); + } + ~Temp_hash_value() { + fhash.put(fname, saved_value); + } +}; #endif + +#endif //PA_HASH_CLASS