Annotation of parser3/src/include/pa_hash.h, revision 1.81
1.28 paf 1: /** @file
1.29 paf 2: Parser: hash class decl.
3:
1.74 misha 4: Copyright (c) 2001-2009 ArtLebedev Group (http://www.artlebedev.com)
1.29 paf 5:
1.54 paf 6: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
1.1 paf 7: */
8:
1.59 paf 9: /*
10: The prime numbers used from zend_hash.c,
11: the part of Zend scripting engine library,
12: Copyrighted (C) 1999-2000 Zend Technologies Ltd.
13: http://www.zend.com/license/0_92.txt
14: For more information about Zend please visit http://www.zend.com/
15: */
16:
1.1 paf 17: #ifndef PA_HASH_H
18: #define PA_HASH_H
1.56 paf 19:
1.81 ! moko 20: static const char * const IDENT_HASH_H="$Date: 2009-12-04 04:19:58 $";
1.1 paf 21:
1.59 paf 22: #include "pa_memory.h"
1.1 paf 23: #include "pa_types.h"
1.74 misha 24: #include "pa_string.h"
1.59 paf 25:
26: const int HASH_ALLOCATES_COUNT=29;
1.1 paf 27:
1.61 paf 28: /** Zend comment: Generated on an Octa-ALPHA 300MHz CPU & 2.5GB RAM monster
29:
30: paf: HPUX ld could not handle static member: unsatisfied symbols
31: */
32: static uint Hash_allocates[HASH_ALLOCATES_COUNT]={
33: 5, 11, 19, 53, 107, 223, 463, 983, 1979, 3907, 7963,
34: 16229, 32531, 65407, 130987, 262237, 524521, 1048793,
35: 2097397, 4194103, 8388857, 16777447, 33554201, 67108961,
36: 134217487, 268435697, 536870683, 1073741621, 2147483399};
37:
1.68 misha 38: /// useful generic hash function
39: inline void generic_hash_code(uint& result, char c) {
40: result=(result<<4)+c;
41: if(uint g=(result&0xF0000000)) {
42: result=result^(g>>24);
43: result=result^g;
44: }
45: }
46: /// useful generic hash function
47: inline void generic_hash_code(uint& result, const char* s) {
48: while(char c=*s++) {
49: result=(result<<4)+c;
50: if(uint g=(result&0xF0000000)) {
51: result=result^(g>>24);
52: result=result^g;
53: }
54: }
55: }
56:
57: /// useful generic hash function
58: inline void generic_hash_code(uint& result, const char* buf, size_t size) {
59: const char* end=buf+size;
60: while(buf<end) {
61: result=(result<<4)+*buf++;
62: if(uint g=(result&0xF0000000)) {
63: result=result^(g>>24);
64: result=result^g;
65: }
66: }
67: }
68:
69: /// simple hash code of int. used by EXIF mapping
70: inline uint hash_code(int self) {
71: uint result=0;
72: generic_hash_code(result, (const char*)&self, sizeof(self));
73: return result;
74: }
75:
1.75 misha 76: #endif // PA_HASH_H
77:
78: #ifndef PA_HASH_CLASS
79: #define PA_HASH_CLASS
1.29 paf 80: /**
1.59 paf 81: Simple hash.
1.29 paf 82:
1.59 paf 83: Automatically rehashed when almost is_full.
1.51 paf 84: Contains no 0 values.
85: get returning 0 means there were no such.
86: "put value 0" means "remove"
1.29 paf 87: */
1.75 misha 88: #ifdef HASH_ORDER
89:
90: #undef HASH
91: #undef HASH_STRING
1.79 misha 92: #undef HASH_NEW_PAIR
93: #undef HASH_FOR_EACH
1.75 misha 94:
95: #define HASH OrderedHash
96: #define HASH_STRING OrderedHashString
1.79 misha 97: #define HASH_NEW_PAIR(code, key, value) *ref=new Pair(code, key, value, *ref, this->last); this->last=&((*ref)->next)
98:
99: #define HASH_FOR_EACH \
100: for(Pair *pair=this->first; pair; pair=pair->next)
1.75 misha 101:
102: #else
103:
104: #define HASH Hash
105: #define HASH_STRING HashString
1.79 misha 106: #define HASH_NEW_PAIR(code, key, value) *ref=new Pair(code, key, value, *ref)
107:
108: #define HASH_FOR_EACH \
109: Pair **ref=this->refs; \
110: for(int index=0; index<this->allocated; index++) \
111: for(Pair *pair=*ref++; pair; pair=pair->link)
1.75 misha 112:
113: #endif
114:
115: template<typename K, typename V> class HASH: public PA_Object {
1.1 paf 116: public:
117:
1.59 paf 118: typedef K key_type;
119: typedef V value_type;
1.3 paf 120:
1.75 misha 121: HASH() {
1.61 paf 122: allocated=Hash_allocates[allocates_index=0];
1.59 paf 123: threshold=allocated*THRESHOLD_PERCENT/100;
124: fpairs_count=fused_refs=0;
125: refs=new(UseGC) Pair*[allocated];
1.75 misha 126: #ifdef HASH_ORDER
127: first=0;
128: last=&first;
129: #endif
1.59 paf 130: }
1.25 paf 131:
1.75 misha 132: HASH(const HASH& source) {
1.59 paf 133: allocates_index=source.allocates_index;
134: allocated=source.allocated;
135: threshold=source.threshold;
136: fused_refs=source.fused_refs;
137: fpairs_count=source.fpairs_count;
138: refs=new(UseGC) Pair*[allocated];
1.81 ! moko 139: // clone & rehash
1.75 misha 140: #ifdef HASH_ORDER
141: first=0;
142: last=&first;
1.81 ! moko 143: for(Pair *pair=source.first; pair; pair=pair->next)
! 144: {
! 145: uint index=pair->code%allocated;
! 146: Pair **ref=&refs[index];
! 147: HASH_NEW_PAIR(pair->code, pair->key, pair->value);
! 148: }
! 149: #else
! 150: for(int i=0; i<source.allocated; i++)
! 151: for(Pair *pair=source.refs[i]; pair; pair=pair->link)
! 152: {
! 153: Pair **ref=&refs[i];
1.79 misha 154: HASH_NEW_PAIR(pair->code, pair->key, pair->value);
1.59 paf 155: }
1.81 ! moko 156: #endif
1.43 parser 157: }
158:
1.73 misha 159: #ifdef USE_DESTRUCTORS
1.75 misha 160: ~HASH() {
1.72 misha 161: Pair **ref=refs;
162: for(int index=0; index<allocated; index++)
163: for(Pair *pair=*ref++; pair;){
164: Pair *next=pair->link;
165: delete pair;
166: pair=next;
167: }
1.71 misha 168: delete[] refs;
169: }
1.73 misha 170: #endif
1.71 misha 171:
1.59 paf 172: /// put a [value] under the [key] @returns existed or not
173: bool put(K key, V value) {
174: if(!value) {
175: remove(key);
176: return false;
177: }
178: if(is_full())
179: expand();
180:
181: uint code=hash_code(key);
182: uint index=code%allocated;
183: Pair **ref=&refs[index];
184: for(Pair *pair=*ref; pair; pair=pair->link)
185: if(pair->code==code && pair->key==key) {
186: // found a pair with the same key
187: pair->value=value;
188: return true;
189: }
190:
191: // proper pair not found -- create&link_in new pair
192: if(!*ref) // root cell were fused_refs?
193: fused_refs++; // not, we'll use it and record the fact
1.79 misha 194: HASH_NEW_PAIR(code, key, value);
1.59 paf 195: fpairs_count++;
196: return false;
1.24 paf 197: }
1.10 paf 198:
1.59 paf 199: /// remove the [key] @returns existed or not
200: bool remove(K key) {
201: uint code=hash_code(key);
202: uint index=code%allocated;
1.75 misha 203: for(Pair **ref=&refs[index]; *ref; ref=&(*ref)->link){
204: Pair *pair=*ref;
205: if(pair->code==code && pair->key==key) {
1.59 paf 206: // found a pair with the same key
1.75 misha 207: Pair *next=pair->link;
208: #ifdef HASH_ORDER
209: *(pair->prev)=pair->next;
210: if(pair->next)
211: pair->next->prev=pair->prev;
212: else
213: last=pair->prev;
214: #endif
215: delete pair;
1.59 paf 216: *ref=next;
217: --fpairs_count;
218: return true;
219: }
1.75 misha 220: }
1.8 paf 221:
1.59 paf 222: return false;
223: }
1.48 paf 224:
1.70 misha 225: /// return true if key exists
1.69 misha 226: bool contains(K key){
1.67 misha 227: uint code=hash_code(key);
228: uint index=code%allocated;
1.70 misha 229: for(Pair *pair=refs[index]; pair; pair=pair->link){
230: if(pair->code==code && pair->key==key)
1.67 misha 231: return true;
232: }
233:
234: return false;
235: }
236:
1.59 paf 237: /// get associated [value] by the [key]
238: V get(K key) const {
239: uint code=hash_code(key);
240: uint index=code%allocated;
241: for(Pair *pair=refs[index]; pair; pair=pair->link)
242: if(pair->code==code && pair->key==key)
243: return pair->value;
244:
245: return V(0);
1.33 paf 246: }
1.70 misha 247:
1.51 paf 248: /// put a [value] under the [key] if that [key] existed @returns existed or not
1.63 paf 249: bool put_replaced(K key, V value) {
1.59 paf 250: if(!value) {
251: remove(key);
252: return false;
253: }
254: uint code=hash_code(key);
255: uint index=code%allocated;
256: for(Pair *pair=refs[index]; pair; pair=pair->link)
257: if(pair->code==code && pair->key==key) {
258: // found a pair with the same key, replacing
259: pair->value=value;
260: return true;
261: }
262:
263: // proper pair not found
264: return false;
1.64 paf 265: }
266:
1.51 paf 267: /// put a [value] under the [key] if that [key] NOT existed @returns existed or not
1.59 paf 268: bool put_dont_replace(K key, V value) {
269: if(!value) {
270: remove(key);
271: return false;
272: }
273: if(is_full())
274: expand();
275:
276: uint code=hash_code(key);
277: uint index=code%allocated;
278: Pair **ref=&refs[index];
279: for(Pair *pair=*ref; pair; pair=pair->link)
280: if(pair->code==code && pair->key==key) {
281: // found a pair with the same key, NOT replacing
282: return true;
283: }
284:
285: // proper pair not found -- create&link_in new pair
286: if(!*ref) // root cell were fused_refs?
287: fused_refs++; // not, we'll use it and record the fact
1.79 misha 288: HASH_NEW_PAIR(code, key, value);
1.59 paf 289: fpairs_count++;
290: return false;
291: }
1.18 paf 292:
1.79 misha 293: /// put all 'src' values if NO with same key existed
1.75 misha 294: void merge_dont_replace(const HASH& src) {
1.79 misha 295: #ifdef HASH_ORDER
296: for(Pair *pair=src.first; pair; pair=pair->next)
297: #else
1.59 paf 298: for(int i=0; i<src.allocated; i++)
299: for(Pair *pair=src.refs[i]; pair; pair=pair->link)
1.79 misha 300: #endif
1.59 paf 301: put_dont_replace(pair->key, pair->value);
1.36 paf 302: }
1.11 paf 303:
1.29 paf 304: /// number of elements in hash
1.59 paf 305: int count() const { return fpairs_count; }
1.25 paf 306:
1.59 paf 307: /// iterate over all pairs
308: template<typename I> void for_each(void callback(K, V, I), I info) const {
1.79 misha 309: HASH_FOR_EACH
1.76 misha 310: callback(pair->key, pair->value, info);
1.59 paf 311: }
1.45 paf 312:
1.59 paf 313: /// iterate over all pairs
314: template<typename I> void for_each_ref(void callback(K, V&, I), I info) const {
1.79 misha 315: HASH_FOR_EACH
1.76 misha 316: callback(pair->key, pair->value, info);
1.59 paf 317: }
1.38 paf 318:
1.59 paf 319: /// iterate over all pairs until condition becomes true, return that element
320: template<typename I> V first_that(bool callback(K, V, I), I info) const {
1.79 misha 321: HASH_FOR_EACH
1.75 misha 322: if(callback(pair->key, pair->value, info))
323: return pair->value;
1.59 paf 324: return V(0);
325: }
1.27 paf 326:
1.29 paf 327: /// remove all elements
1.59 paf 328: void clear() {
329: memset(refs, 0, sizeof(*refs)*allocated);
330: fpairs_count=fused_refs=0;
1.75 misha 331: #ifdef HASH_ORDER
332: first=0;
333: last=&first;
334: #endif
1.59 paf 335: }
1.15 paf 336:
1.74 misha 337: protected:
1.1 paf 338:
1.39 paf 339: /// expand when these %% of allocated exausted
1.1 paf 340: enum {
341: THRESHOLD_PERCENT=75
342: };
1.9 paf 343:
1.61 paf 344: /// the index of [allocated] in [Hash_allocates]
1.19 paf 345: int allocates_index;
1.1 paf 346:
1.39 paf 347: /// number of allocated pairs
1.19 paf 348: int allocated;
1.1 paf 349:
1.59 paf 350: /// helper: expanding when fused_refs == threshold
1.1 paf 351: int threshold;
352:
1.39 paf 353: /// used pairs
1.59 paf 354: int fused_refs;
1.44 parser 355:
356: /// stored pairs total (including those by links)
1.59 paf 357: int fpairs_count;
1.1 paf 358:
1.39 paf 359: /// pair storage
1.59 paf 360: class Pair: public PA_Allocated {
361: public:
1.1 paf 362: uint code;
1.59 paf 363: K key;
364: V value;
1.1 paf 365: Pair *link;
1.75 misha 366: #ifdef HASH_ORDER
367: Pair **prev;
368: Pair *next;
369:
370: Pair(uint acode, K akey, V avalue, Pair *alink, Pair **aprev) : code(acode), key(akey), value(avalue), link(alink),
371: prev(aprev), next(0) { *aprev=this; }
372: #else
373: Pair(uint acode, K akey, V avalue, Pair *alink) : code(acode), key(akey), value(avalue), link(alink) {}
374: #endif
1.2 paf 375: } **refs;
1.1 paf 376:
1.75 misha 377: #ifdef HASH_ORDER
378: Pair *first;
379: Pair **last;
380: #endif
381:
1.39 paf 382: /// filled to threshold: needs expanding
1.59 paf 383: bool is_full() { return fused_refs==threshold; }
1.5 paf 384:
1.39 paf 385: /// allocate larger buffer & rehash
1.59 paf 386: void expand() {
387: int old_allocated=allocated;
388: Pair **old_refs=refs;
389:
390: allocates_index=allocates_index+1<HASH_ALLOCATES_COUNT?allocates_index+1:HASH_ALLOCATES_COUNT-1;
391: // allocated bigger refs array
1.61 paf 392: allocated=Hash_allocates[allocates_index];
1.59 paf 393: threshold=allocated*THRESHOLD_PERCENT/100;
394: refs=new(UseGC) Pair*[allocated];
395:
396: // rehash
397: Pair **old_ref=old_refs;
398: for(int old_index=0; old_index<old_allocated; old_index++)
399: for(Pair *pair=*old_ref++; pair; ) {
400: Pair *next=pair->link;
401:
402: uint new_index=pair->code%allocated;
403: Pair **new_ref=&refs[new_index];
404: pair->link=*new_ref;
405: *new_ref=pair;
406:
407: pair=next;
408: }
409:
410: delete[] old_refs;
411: }
1.4 paf 412:
413: private: //disabled
414:
1.75 misha 415: HASH& operator = (const HASH&) { return *this; }
1.1 paf 416: };
1.59 paf 417:
1.74 misha 418: /**
1.75 misha 419: Simple String::body hash.
420: Allows hash code caching
1.74 misha 421: */
422:
423: #ifdef HASH_CODE_CACHING
424:
1.75 misha 425: template<typename V> class HASH_STRING: public HASH<const CORD,V> {
1.74 misha 426: public:
427:
1.75 misha 428: typedef typename HASH<const CORD,V>::Pair Pair;
1.74 misha 429: typedef const String::Body &K;
430:
431: typedef K key_type;
432:
433: /// put a [value] under the [key] @returns existed or not
434: bool put(K str, V value) {
435: if(!value) {
436: remove(str);
437: return false;
438: }
439: if(this->is_full())
440: this->expand();
441:
442: CORD key=str.get_cord();
443:
444: uint code=str.get_hash_code();
445: uint index=code%this->allocated;
446: Pair **ref=&this->refs[index];
447: for(Pair *pair=*ref; pair; pair=pair->link)
448: if(pair->code==code && CORD_cmp(pair->key,key)==0) {
449: // found a pair with the same key
450: pair->value=value;
451: return true;
452: }
453:
454: // proper pair not found -- create&link_in new pair
455: if(!*ref) // root cell were fused_refs?
456: this->fused_refs++; // not, we'll use it and record the fact
1.79 misha 457: HASH_NEW_PAIR(code, key, value);
1.74 misha 458: this->fpairs_count++;
459: return false;
460: }
461:
462: /// remove the [key] @returns existed or not
463: bool remove(K str) {
464: CORD key=str.get_cord();
465: uint code=str.get_hash_code();
466: uint index=code%this->allocated;
1.75 misha 467: for(Pair **ref=&this->refs[index]; *ref; ref=&(*ref)->link){
468: Pair *pair=*ref;
469: if(pair->code==code && CORD_cmp(pair->key,key)==0) {
1.74 misha 470: // found a pair with the same key
1.75 misha 471: Pair *next=pair->link;
472: #ifdef HASH_ORDER
473: *(pair->prev)=pair->next;
474: if(pair->next)
475: pair->next->prev=pair->prev;
476: else
477: this->last=pair->prev;
478: #endif
479: delete pair;
1.74 misha 480: *ref=next;
481: --this->fpairs_count;
482: return true;
483: }
1.75 misha 484: }
1.74 misha 485:
486: return false;
487: }
488:
489: /// return true if key exists
490: bool contains(K str){
491: CORD key=str.get_cord();
492: uint code=str.get_hash_code();
493: uint index=code%this->allocated;
494: for(Pair *pair=this->refs[index]; pair; pair=pair->link){
495: if(pair->code==code && CORD_cmp(pair->key,key)==0)
496: return true;
497: }
498:
499: return false;
500: }
501:
502: /// get associated [value] by the [key]
503: V get(K str) const {
504: CORD key=str.get_cord();
505: uint code=str.get_hash_code();
506: uint index=code%this->allocated;
507: for(Pair *pair=this->refs[index]; pair; pair=pair->link)
508: if(pair->code==code && CORD_cmp(pair->key,key)==0)
509: return pair->value;
510:
511: return V(0);
512: }
513:
514: /// put a [value] under the [key] if that [key] existed @returns existed or not
515: bool put_replaced(K str, V value) {
516: if(!value) {
517: remove(str);
518: return false;
519: }
520:
521: CORD key=str.get_cord();
522: uint code=str.get_hash_code();
523: uint index=code%this->allocated;
524: for(Pair *pair=this->refs[index]; pair; pair=pair->link)
525: if(pair->code==code && CORD_cmp(pair->key,key)==0) {
526: // found a pair with the same key, replacing
527: pair->value=value;
528: return true;
529: }
530:
531: // proper pair not found
532: return false;
533: }
534:
535: /// put a [value] under the [key] if that [key] NOT existed @returns existed or not
536: bool put_dont_replace(K str, V value) {
537: if(!value) {
538: remove(str);
539: return false;
540: }
541: if(this->is_full())
542: this->expand();
543:
544: CORD key=str.get_cord();
545: uint code=str.get_hash_code();
546: uint index=code%this->allocated;
547: Pair **ref=&this->refs[index];
548: for(Pair *pair=*ref; pair; pair=pair->link)
549: if(pair->code==code && CORD_cmp(pair->key,key)==0) {
550: // found a pair with the same key, NOT replacing
551: return true;
552: }
553:
554: // proper pair not found -- create&link_in new pair
555: if(!*ref) // root cell were fused_refs?
556: this->fused_refs++; // not, we'll use it and record the fact
1.79 misha 557: HASH_NEW_PAIR(code, key, value);
1.74 misha 558: this->fpairs_count++;
559: return false;
560: }
561:
1.79 misha 562: /// put all 'src' values if NO with same key existed
563: void merge_dont_replace(const HASH_STRING& src) {
1.76 misha 564: #ifdef HASH_ORDER
1.79 misha 565: for(Pair *pair=src.first; pair; pair=pair->next)
1.76 misha 566: #else
1.79 misha 567: for(int i=0; i<src.allocated; i++)
568: for(Pair *pair=src.refs[i]; pair; pair=pair->link)
1.76 misha 569: #endif
1.79 misha 570: put_dont_replace(String::Body(pair->key, pair->code), pair->value);
571: }
572:
573: /// iterate over all pairs
574: template<typename I> void for_each(void callback(K, V, I), I info) const {
575: HASH_FOR_EACH
576: callback(String::Body(pair->key, pair->code), pair->value, info);
1.74 misha 577: }
578:
579: /// iterate over all pairs
580: template<typename I> void for_each_ref(void callback(K, V&, I), I info) const {
1.79 misha 581: HASH_FOR_EACH
582: callback(String::Body(pair->key, pair->code), pair->value, info);
1.74 misha 583: }
584:
585: /// iterate over all pairs until condition becomes true, return that element
586: template<typename I> V first_that(bool callback(K, V, I), I info) const {
1.79 misha 587: HASH_FOR_EACH
1.75 misha 588: if(callback(String::Body(pair->key, pair->code), pair->value, info))
589: return pair->value;
1.74 misha 590: return V(0);
591: }
1.80 misha 592:
593: /// simple hash iterator
594: class Iterator {
595: const HASH_STRING<V>& fhash;
596: Pair *fcurrent;
597: public:
598: Iterator(const HASH_STRING<V>& ahash): fhash(ahash) {
599: fcurrent=fhash.first;
600: }
601:
602: operator bool () {
603: return fcurrent != 0;
604: }
605:
606: void next() {
607: fcurrent=fcurrent->next;
608: }
609:
610: String::Body key(){
611: return String::Body(fcurrent->key, fcurrent->code);
612: }
613:
614: V value(){
615: return fcurrent->value;
616: }
617: };
1.74 misha 618: };
1.78 misha 619: #else //HASH_CODE_CACHING
1.74 misha 620:
1.75 misha 621: template<typename V> class HASH_STRING: public HASH<const String::Body,V>{};
1.74 misha 622:
1.78 misha 623: #endif //HASH_CODE_CACHING
1.74 misha 624:
1.75 misha 625: #ifndef HASH_ORDER
1.74 misha 626: /// Auto-object used to temporarily substituting/removing string hash values
1.59 paf 627: template <typename K, typename V>
1.55 paf 628: class Temp_hash_value {
1.74 misha 629: HashString<V> &fhash;
1.59 paf 630: K fname;
631: V saved_value;
1.55 paf 632: public:
1.74 misha 633: Temp_hash_value(HashString<V>& ahash, K aname, V avalue) :
1.55 paf 634: fhash(ahash),
635: fname(aname),
636: saved_value(ahash.get(aname)) {
637: fhash.put(aname, avalue);
638: }
639: ~Temp_hash_value() {
640: fhash.put(fname, saved_value);
641: }
642: };
1.75 misha 643: #endif
1.1 paf 644:
1.75 misha 645: #endif //PA_HASH_CLASS
E-mail: