--- parser3/src/include/pa_hash.h 2005/07/26 12:43:05 1.63 +++ parser3/src/include/pa_hash.h 2017/02/07 22:00:34 1.96 @@ -1,7 +1,7 @@ /** @file Parser: hash class decl. - Copyright (c) 2001-2004 ArtLebedev Group (http://www.artlebedev.com) + Copyright (c) 2001-2017 Art. Lebedev Studio (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) */ @@ -17,10 +17,11 @@ #ifndef PA_HASH_H #define PA_HASH_H -static const char * const IDENT_HASH_H="$Date: 2005/07/26 12:43:05 $"; +#define IDENT_PA_HASH_H "$Id: pa_hash.h,v 1.96 2017/02/07 22:00:34 moko Exp $" #include "pa_memory.h" #include "pa_types.h" +#include "pa_string.h" const int HASH_ALLOCATES_COUNT=29; @@ -34,6 +35,48 @@ static uint Hash_allocates[HASH_ALLOCATE 2097397, 4194103, 8388857, 16777447, 33554201, 67108961, 134217487, 268435697, 536870683, 1073741621, 2147483399}; +/// useful generic hash function +inline void generic_hash_code(uint& result, char c) { + result=(result<<4)+c; + if(uint g=(result&0xF0000000)) { + result=result^(g>>24); + result=result^g; + } +} +/// useful generic hash function +inline void generic_hash_code(uint& result, const char* s) { + while(char c=*s++) { + result=(result<<4)+c; + if(uint g=(result&0xF0000000)) { + result=result^(g>>24); + result=result^g; + } + } +} + +/// useful generic hash function +inline void generic_hash_code(uint& result, const char* buf, size_t size) { + const char* end=buf+size; + while(buf>24); + result=result^g; + } + } +} + +/// simple hash code of int. used by EXIF mapping +inline uint hash_code(int self) { + uint result=0; + generic_hash_code(result, (const char*)&self, sizeof(self)); + return result; +} + +#endif // PA_HASH_H + +#ifndef PA_HASH_CLASS +#define PA_HASH_CLASS /** Simple hash. @@ -42,39 +85,87 @@ static uint Hash_allocates[HASH_ALLOCATE get returning 0 means there were no such. "put value 0" means "remove" */ -template class Hash: public PA_Object { -public: +#ifdef HASH_ORDER +#undef HASH +#undef HASH_STRING +#undef HASH_NEW_PAIR +#undef HASH_ORDER_CLEAR +#undef HASH_FOR_EACH + +#define HASH OrderedHash +#define HASH_STRING OrderedHashString +#define HASH_NEW_PAIR(code, key, value) *ref=new Pair(code, key, value, *ref, this->last); this->last=&((*ref)->next) +#define HASH_ORDER_CLEAR() first=0; last=&first + +#define HASH_FOR_EACH \ + for(Pair *pair=this->first; pair; pair=pair->next) + +#else + +#define HASH Hash +#define HASH_STRING HashString +#define HASH_NEW_PAIR(code, key, value) *ref=new Pair(code, key, value, *ref) +#define HASH_ORDER_CLEAR() + +#define HASH_FOR_EACH \ + Pair **ref=this->refs; \ + for(int index=0; indexallocated; index++) \ + for(Pair *pair=*ref++; pair; pair=pair->link) + +#endif + +template class HASH: public PA_Object { +protected: + class Pair; +public: typedef K key_type; typedef V value_type; - Hash() { + HASH() { allocated=Hash_allocates[allocates_index=0]; - threshold=allocated*THRESHOLD_PERCENT/100; fpairs_count=fused_refs=0; - refs=new(UseGC) Pair*[allocated]; + refs=new Pair*[allocated]; + HASH_ORDER_CLEAR(); } - Hash(const Hash& source) { + HASH(const HASH& source) { allocates_index=source.allocates_index; allocated=source.allocated; - threshold=source.threshold; fused_refs=source.fused_refs; fpairs_count=source.fpairs_count; - refs=new(UseGC) Pair*[allocated]; - + refs=new Pair*[allocated]; // clone & rehash - Pair **old_ref=source.refs; +#ifdef HASH_ORDER + HASH_ORDER_CLEAR(); + for(Pair *pair=source.first; pair; pair=pair->next) + { + uint index=pair->code%allocated; + Pair **ref=&refs[index]; + HASH_NEW_PAIR(pair->code, pair->key, pair->value); + } +#else + for(int i=0; ilink) + { + Pair **ref=&refs[i]; + HASH_NEW_PAIR(pair->code, pair->key, pair->value); + } +#endif + } + +#ifdef USE_DESTRUCTORS + ~HASH() { + Pair **ref=refs; for(int index=0; indexlink; - - Pair **new_ref=&refs[index]; - *new_ref=new Pair(pair->code, pair->key, pair->value, *new_ref); - + delete pair; pair=next; } + delete[] refs; } +#endif /// put a [value] under the [key] @returns existed or not bool put(K key, V value) { @@ -98,56 +189,44 @@ public: // proper pair not found -- create&link_in new pair if(!*ref) // root cell were fused_refs? fused_refs++; // not, we'll use it and record the fact - *ref=new Pair(code, key, value, *ref); + HASH_NEW_PAIR(code, key, value); fpairs_count++; return false; } - /// put a [value] under the [key] @returns existed or not - template R maybe_put(K key, V value, F prevent) { - if(!value) { - remove(key); - return 0; - } - if(is_full()) - expand(); - + /// remove the [key] @returns existed or not + bool remove(K key) { uint code=hash_code(key); uint index=code%allocated; - Pair **ref=&refs[index]; - for(Pair *pair=*ref; pair; pair=pair->link) + for(Pair **ref=&refs[index]; *ref; ref=&(*ref)->link){ + Pair *pair=*ref; if(pair->code==code && pair->key==key) { // found a pair with the same key - - // prevent-function intercepted put? - if(R result=prevent(pair->value)) - return result; - - pair->value=value; - return reinterpret_cast(1); + Pair *next=pair->link; +#ifdef HASH_ORDER + *(pair->prev)=pair->next; + if(pair->next) + pair->next->prev=pair->prev; + else + last=pair->prev; +#endif + *ref=next; + --fpairs_count; + return true; } - - // proper pair not found -- create&link_in new pair - if(!*ref) // root cell were fused_refs? - fused_refs++; // not, we'll use it and record the fact - *ref=new Pair(code, key, value, *ref); - fpairs_count++; - return 0; + } + + return false; } - /// remove the [key] @returns existed or not - bool remove(K key) { + /// return true if key exists + bool contains(K key){ uint code=hash_code(key); uint index=code%allocated; - for(Pair **ref=&refs[index]; *ref; ref=&(*ref)->link) - if((*ref)->code==code && (*ref)->key==key) { - // found a pair with the same key - Pair *next=(*ref)->link; - delete *ref; - *ref=next; - --fpairs_count; + for(Pair *pair=refs[index]; pair; pair=pair->link){ + if(pair->code==code && pair->key==key) return true; - } + } return false; } @@ -163,6 +242,49 @@ public: return V(0); } +#ifdef HASH_ORDER + String::Body first_key() const { +#ifdef HASH_CODE_CACHING + return (first) ? String::Body(first->key, first->code) : String::Body(); +#else + return (first) ? first->key : String::Body(); +#endif + } + + V first_value() const { + return (first) ? first->value : V(0); + } + + String::Body last_key() const { + if (fpairs_count) { + Pair* pair = (Pair*)((char *)last - offsetof(Pair, next)); +#ifdef HASH_CODE_CACHING + return String::Body(pair->key, pair->code); +#else + return pair->key; +#endif + } else { + return String::Body(); + } + } + + V last_value() const { + return (fpairs_count) ? ((Pair *)((char *)last - offsetof(Pair, next)))->value : V(0); + } + + void order_clear() { + HASH_ORDER_CLEAR(); + } + + void order_next(Pair* pair) { + pair->prev=last; + pair->next=0; + *last=pair; + last=&(pair->next); + } + +#endif //HASH_ORDER + /// put a [value] under the [key] if that [key] existed @returns existed or not bool put_replaced(K key, V value) { if(!value) { @@ -203,17 +325,19 @@ public: // proper pair not found -- create&link_in new pair if(!*ref) // root cell were fused_refs? fused_refs++; // not, we'll use it and record the fact - *ref=new Pair(code, key, value, *ref); + HASH_NEW_PAIR(code, key, value); fpairs_count++; return false; } - /** put all 'src' values if NO with same key existed - @todo optimize this.allocated==src.allocated case - */ - void merge_dont_replace(const Hash& src) { + /// put all 'src' values if NO with same key existed + void merge_dont_replace(const HASH& src) { +#ifdef HASH_ORDER + for(Pair *pair=src.first; pair; pair=pair->next) +#else for(int i=0; ilink) +#endif put_dont_replace(pair->key, pair->value); } @@ -222,43 +346,32 @@ public: /// iterate over all pairs template void for_each(void callback(K, V, I), I info) const { - Pair **ref=refs; - for(int index=0; indexlink) - callback(pair->key, pair->value, info); + HASH_FOR_EACH + callback(pair->key, pair->value, info); } /// iterate over all pairs template void for_each_ref(void callback(K, V&, I), I info) const { - Pair **ref=refs; - for(int index=0; indexlink) - callback(pair->key, pair->value, info); + HASH_FOR_EACH + callback(pair->key, pair->value, info); } /// iterate over all pairs until condition becomes true, return that element template V first_that(bool callback(K, V, I), I info) const { - Pair **ref=refs; - for(int index=0; indexlink) - if(callback(pair->key, pair->value, info)) - return pair->value; - + HASH_FOR_EACH + if(callback(pair->key, pair->value, info)) + return pair->value; return V(0); } /// remove all elements void clear() { memset(refs, 0, sizeof(*refs)*allocated); - fpairs_count=fused_refs=0; + fpairs_count=fused_refs=0; + HASH_ORDER_CLEAR(); } -private: - - /// expand when these %% of allocated exausted - enum { - THRESHOLD_PERCENT=75 - }; +protected: /// the index of [allocated] in [Hash_allocates] int allocates_index; @@ -266,9 +379,6 @@ private: /// number of allocated pairs int allocated; - /// helper: expanding when fused_refs == threshold - int threshold; - /// used pairs int fused_refs; @@ -282,27 +392,34 @@ private: K key; V value; Pair *link; - - Pair(uint acode, K akey, V avalue, Pair *alink) : - code(acode), - key(akey), - value(avalue), - link(alink) {} +#ifdef HASH_ORDER + Pair **prev; + Pair *next; + + Pair(uint acode, K akey, V avalue, Pair *alink, Pair **aprev) : code(acode), key(akey), value(avalue), link(alink), + prev(aprev), next(0) { *aprev=this; } +#else + Pair(uint acode, K akey, V avalue, Pair *alink) : code(acode), key(akey), value(avalue), link(alink) {} +#endif } **refs; - /// filled to threshold: needs expanding - bool is_full() { return fused_refs==threshold; } +#ifdef HASH_ORDER + Pair *first; + Pair **last; +#endif + + /// filled to threshold (THRESHOLD_PERCENT=75), needs expanding + bool is_full() { return fused_refs + allocated/4 >= allocated; } /// allocate larger buffer & rehash void expand() { int old_allocated=allocated; Pair **old_refs=refs; - allocates_index=allocates_index+1>24); - result=result^g; +/** + Simple String::body hash. + Allows hash code caching +*/ + +#ifdef HASH_CODE_CACHING + +template class HASH_STRING: public HASH { +public: + + typedef typename HASH::Pair Pair; + typedef const String::Body &K; + + typedef K key_type; + + /// put a [value] under the [key] @returns existed or not + bool put(K str, V value) { + if(!value) { + remove(str); + return false; + } + if(this->is_full()) + this->expand(); + + CORD key=str.get_cord(); + + uint code=str.get_hash_code(); + uint index=code%this->allocated; + Pair **ref=&this->refs[index]; + for(Pair *pair=*ref; pair; pair=pair->link) + if(pair->code==code && CORD_cmp(pair->key,key)==0) { + // found a pair with the same key + pair->value=value; + return true; + } + + // proper pair not found -- create&link_in new pair + if(!*ref) // root cell were fused_refs? + this->fused_refs++; // not, we'll use it and record the fact + HASH_NEW_PAIR(code, key, value); + this->fpairs_count++; + return false; } -} -/// useful generic hash function -inline void generic_hash_code(uint& result, const char* s) { - while(char c=*s++) { - result=(result<<4)+c; - if(uint g=(result&0xF0000000)) { - result=result^(g>>24); - result=result^g; + + /// remove the [key] @returns existed or not + bool remove(K str) { + CORD key=str.get_cord(); + uint code=str.get_hash_code(); + uint index=code%this->allocated; + for(Pair **ref=&this->refs[index]; *ref; ref=&(*ref)->link){ + Pair *pair=*ref; + if(pair->code==code && CORD_cmp(pair->key,key)==0) { + // found a pair with the same key + Pair *next=pair->link; +#ifdef HASH_ORDER + *(pair->prev)=pair->next; + if(pair->next) + pair->next->prev=pair->prev; + else + this->last=pair->prev; +#endif + *ref=next; + --this->fpairs_count; + return true; + } } + + return false; } -} -/// useful generic hash function -inline void generic_hash_code(uint& result, const char* buf, size_t size) { - const char* end=buf+size; - while(buf>24); - result=result^g; + /// return true if key exists + bool contains(K str){ + CORD key=str.get_cord(); + uint code=str.get_hash_code(); + uint index=code%this->allocated; + for(Pair *pair=this->refs[index]; pair; pair=pair->link){ + if(pair->code==code && CORD_cmp(pair->key,key)==0) + return true; } + + return false; } -} -/// simple hash code of int. used by EXIF mapping -inline uint hash_code(int self) { - uint result=0; - generic_hash_code(result, (const char*)&self, sizeof(self)); - return result; -} + /// get associated [value] by the [key] + V get(K str) const { + CORD key=str.get_cord(); + uint code=str.get_hash_code(); + uint index=code%this->allocated; + for(Pair *pair=this->refs[index]; pair; pair=pair->link) + if(pair->code==code && CORD_cmp(pair->key,key)==0) + return pair->value; + + return V(0); + } + + /// get associated [value] by the [key], optimized + V get(const char *key) const { + uint code=0; + if(key && *key){ + generic_hash_code(code, key); + } else { + key=0; + } + uint index=code%this->allocated; + for(Pair *pair=this->refs[index]; pair; pair=pair->link) + if(pair->code==code && CORD_cmp(pair->key,(CORD)key)==0) + return pair->value; + + return V(0); + } + + /// put a [value] under the [key] if that [key] existed @returns existed or not + bool put_replaced(K str, V value) { + if(!value) { + remove(str); + return false; + } + + CORD key=str.get_cord(); + uint code=str.get_hash_code(); + uint index=code%this->allocated; + for(Pair *pair=this->refs[index]; pair; pair=pair->link) + if(pair->code==code && CORD_cmp(pair->key,key)==0) { + // found a pair with the same key, replacing + pair->value=value; + return true; + } + + // proper pair not found + return false; + } + + /// put a [value] under the [key] if that [key] NOT existed @returns existed or not + bool put_dont_replace(K str, V value) { + if(!value) { + remove(str); + return false; + } + if(this->is_full()) + this->expand(); -/// Auto-object used to temporarily substituting/removing hash values -template + CORD key=str.get_cord(); + uint code=str.get_hash_code(); + uint index=code%this->allocated; + Pair **ref=&this->refs[index]; + for(Pair *pair=*ref; pair; pair=pair->link) + if(pair->code==code && CORD_cmp(pair->key,key)==0) { + // found a pair with the same key, NOT replacing + return true; + } + + // proper pair not found -- create&link_in new pair + if(!*ref) // root cell were fused_refs? + this->fused_refs++; // not, we'll use it and record the fact + HASH_NEW_PAIR(code, key, value); + this->fpairs_count++; + return false; + } + + /// put all 'src' values if NO with same key existed + void merge_dont_replace(const HASH_STRING& src) { +#ifdef HASH_ORDER + for(Pair *pair=src.first; pair; pair=pair->next) +#else + for(int i=0; ilink) +#endif + put_dont_replace(String::Body(pair->key, pair->code), pair->value); + } + + /// iterate over all pairs + template void for_each(void callback(K, V, I), I info) const { + HASH_FOR_EACH + callback(String::Body(pair->key, pair->code), pair->value, info); + } + + /// iterate over all pairs + template void for_each_ref(void callback(K, V&, I), I info) const { + HASH_FOR_EACH + callback(String::Body(pair->key, pair->code), pair->value, info); + } + + /// iterate over all pairs until condition becomes true, return that element + template V first_that(bool callback(K, V, I), I info) const { + HASH_FOR_EACH + if(callback(String::Body(pair->key, pair->code), pair->value, info)) + return pair->value; + return V(0); + } + +#else //HASH_CODE_CACHING + +template class HASH_STRING: public HASH{ +public: + typedef typename HASH::Pair Pair; + +#endif //HASH_CODE_CACHING + + /// simple hash iterator + class Iterator { + const HASH_STRING& fhash; + Pair *fcurrent; +#ifndef HASH_ORDER + int i; +#endif + public: +#ifdef HASH_ORDER + Iterator(const HASH_STRING& ahash): fhash(ahash) { + fcurrent=fhash.first; + } + + void next() { + fcurrent=fcurrent->next; + } +#else + Iterator(const HASH_STRING& ahash): fhash(ahash) { + fcurrent=0; + for(i=0; ilink) + return; + for(i++; ikey, fcurrent->code); +#else + return fcurrent->key; +#endif + } + + V value(){ + return fcurrent->value; + } + + Pair *pair(){ + return fcurrent; + } + }; + +}; + +#ifndef HASH_ORDER +/// Auto-object used to temporarily substituting/removing string hash values +template class Temp_hash_value { - Hash& fhash; - K fname; + H *fhash; + String::Body fname; V saved_value; public: - Temp_hash_value(Hash& ahash, K aname, V avalue) : - fhash(ahash), - fname(aname), - saved_value(ahash.get(aname)) { - fhash.put(aname, avalue); + Temp_hash_value(H *ahash, String::Body aname, V avalue) : fhash(ahash), fname(aname) { + if(fhash){ + saved_value=fhash->get(aname); + fhash->put(aname, avalue); + } } - ~Temp_hash_value() { - fhash.put(fname, saved_value); + ~Temp_hash_value() { + if(fhash) + fhash->put(fname, saved_value); } }; - #endif + +#endif //PA_HASH_CLASS