Annotation of parser3/src/include/pa_hash.h, revision 1.82
1.28 paf 1: /** @file
1.29 paf 2: Parser: hash class decl.
3:
1.74 misha 4: Copyright (c) 2001-2009 ArtLebedev Group (http://www.artlebedev.com)
1.29 paf 5:
1.54 paf 6: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
1.1 paf 7: */
8:
1.59 paf 9: /*
10: The prime numbers used from zend_hash.c,
11: the part of Zend scripting engine library,
12: Copyrighted (C) 1999-2000 Zend Technologies Ltd.
13: http://www.zend.com/license/0_92.txt
14: For more information about Zend please visit http://www.zend.com/
15: */
16:
1.1 paf 17: #ifndef PA_HASH_H
18: #define PA_HASH_H
1.56 paf 19:
1.82 ! misha 20: static const char * const IDENT_HASH_H="$Date: 2010-07-23 22:23:24 $";
1.1 paf 21:
1.59 paf 22: #include "pa_memory.h"
1.1 paf 23: #include "pa_types.h"
1.74 misha 24: #include "pa_string.h"
1.59 paf 25:
26: const int HASH_ALLOCATES_COUNT=29;
1.1 paf 27:
1.61 paf 28: /** Zend comment: Generated on an Octa-ALPHA 300MHz CPU & 2.5GB RAM monster
29:
30: paf: HPUX ld could not handle static member: unsatisfied symbols
31: */
32: static uint Hash_allocates[HASH_ALLOCATES_COUNT]={
33: 5, 11, 19, 53, 107, 223, 463, 983, 1979, 3907, 7963,
34: 16229, 32531, 65407, 130987, 262237, 524521, 1048793,
35: 2097397, 4194103, 8388857, 16777447, 33554201, 67108961,
36: 134217487, 268435697, 536870683, 1073741621, 2147483399};
37:
1.68 misha 38: /// useful generic hash function
39: inline void generic_hash_code(uint& result, char c) {
40: result=(result<<4)+c;
41: if(uint g=(result&0xF0000000)) {
42: result=result^(g>>24);
43: result=result^g;
44: }
45: }
46: /// useful generic hash function
47: inline void generic_hash_code(uint& result, const char* s) {
48: while(char c=*s++) {
49: result=(result<<4)+c;
50: if(uint g=(result&0xF0000000)) {
51: result=result^(g>>24);
52: result=result^g;
53: }
54: }
55: }
56:
57: /// useful generic hash function
58: inline void generic_hash_code(uint& result, const char* buf, size_t size) {
59: const char* end=buf+size;
60: while(buf<end) {
61: result=(result<<4)+*buf++;
62: if(uint g=(result&0xF0000000)) {
63: result=result^(g>>24);
64: result=result^g;
65: }
66: }
67: }
68:
69: /// simple hash code of int. used by EXIF mapping
70: inline uint hash_code(int self) {
71: uint result=0;
72: generic_hash_code(result, (const char*)&self, sizeof(self));
73: return result;
74: }
75:
1.75 misha 76: #endif // PA_HASH_H
77:
78: #ifndef PA_HASH_CLASS
79: #define PA_HASH_CLASS
1.29 paf 80: /**
1.59 paf 81: Simple hash.
1.29 paf 82:
1.59 paf 83: Automatically rehashed when almost is_full.
1.51 paf 84: Contains no 0 values.
85: get returning 0 means there were no such.
86: "put value 0" means "remove"
1.29 paf 87: */
1.75 misha 88: #ifdef HASH_ORDER
89:
90: #undef HASH
91: #undef HASH_STRING
1.79 misha 92: #undef HASH_NEW_PAIR
93: #undef HASH_FOR_EACH
1.75 misha 94:
95: #define HASH OrderedHash
96: #define HASH_STRING OrderedHashString
1.79 misha 97: #define HASH_NEW_PAIR(code, key, value) *ref=new Pair(code, key, value, *ref, this->last); this->last=&((*ref)->next)
98:
99: #define HASH_FOR_EACH \
100: for(Pair *pair=this->first; pair; pair=pair->next)
1.75 misha 101:
102: #else
103:
104: #define HASH Hash
105: #define HASH_STRING HashString
1.79 misha 106: #define HASH_NEW_PAIR(code, key, value) *ref=new Pair(code, key, value, *ref)
107:
108: #define HASH_FOR_EACH \
109: Pair **ref=this->refs; \
110: for(int index=0; index<this->allocated; index++) \
111: for(Pair *pair=*ref++; pair; pair=pair->link)
1.75 misha 112:
113: #endif
114:
115: template<typename K, typename V> class HASH: public PA_Object {
1.1 paf 116: public:
117:
1.59 paf 118: typedef K key_type;
119: typedef V value_type;
1.3 paf 120:
1.75 misha 121: HASH() {
1.61 paf 122: allocated=Hash_allocates[allocates_index=0];
1.59 paf 123: threshold=allocated*THRESHOLD_PERCENT/100;
124: fpairs_count=fused_refs=0;
125: refs=new(UseGC) Pair*[allocated];
1.75 misha 126: #ifdef HASH_ORDER
127: first=0;
128: last=&first;
129: #endif
1.59 paf 130: }
1.25 paf 131:
1.75 misha 132: HASH(const HASH& source) {
1.59 paf 133: allocates_index=source.allocates_index;
134: allocated=source.allocated;
135: threshold=source.threshold;
136: fused_refs=source.fused_refs;
137: fpairs_count=source.fpairs_count;
138: refs=new(UseGC) Pair*[allocated];
1.81 moko 139: // clone & rehash
1.75 misha 140: #ifdef HASH_ORDER
141: first=0;
142: last=&first;
1.81 moko 143: for(Pair *pair=source.first; pair; pair=pair->next)
144: {
145: uint index=pair->code%allocated;
146: Pair **ref=&refs[index];
147: HASH_NEW_PAIR(pair->code, pair->key, pair->value);
148: }
149: #else
150: for(int i=0; i<source.allocated; i++)
151: for(Pair *pair=source.refs[i]; pair; pair=pair->link)
152: {
153: Pair **ref=&refs[i];
1.79 misha 154: HASH_NEW_PAIR(pair->code, pair->key, pair->value);
1.59 paf 155: }
1.81 moko 156: #endif
1.43 parser 157: }
158:
1.73 misha 159: #ifdef USE_DESTRUCTORS
1.75 misha 160: ~HASH() {
1.72 misha 161: Pair **ref=refs;
162: for(int index=0; index<allocated; index++)
163: for(Pair *pair=*ref++; pair;){
164: Pair *next=pair->link;
165: delete pair;
166: pair=next;
167: }
1.71 misha 168: delete[] refs;
169: }
1.73 misha 170: #endif
1.71 misha 171:
1.59 paf 172: /// put a [value] under the [key] @returns existed or not
173: bool put(K key, V value) {
174: if(!value) {
175: remove(key);
176: return false;
177: }
178: if(is_full())
179: expand();
180:
181: uint code=hash_code(key);
182: uint index=code%allocated;
183: Pair **ref=&refs[index];
184: for(Pair *pair=*ref; pair; pair=pair->link)
185: if(pair->code==code && pair->key==key) {
186: // found a pair with the same key
187: pair->value=value;
188: return true;
189: }
190:
191: // proper pair not found -- create&link_in new pair
192: if(!*ref) // root cell were fused_refs?
193: fused_refs++; // not, we'll use it and record the fact
1.79 misha 194: HASH_NEW_PAIR(code, key, value);
1.59 paf 195: fpairs_count++;
196: return false;
1.24 paf 197: }
1.10 paf 198:
1.59 paf 199: /// remove the [key] @returns existed or not
200: bool remove(K key) {
201: uint code=hash_code(key);
202: uint index=code%allocated;
1.75 misha 203: for(Pair **ref=&refs[index]; *ref; ref=&(*ref)->link){
204: Pair *pair=*ref;
205: if(pair->code==code && pair->key==key) {
1.59 paf 206: // found a pair with the same key
1.75 misha 207: Pair *next=pair->link;
208: #ifdef HASH_ORDER
209: *(pair->prev)=pair->next;
210: if(pair->next)
211: pair->next->prev=pair->prev;
212: else
213: last=pair->prev;
214: #endif
215: delete pair;
1.59 paf 216: *ref=next;
217: --fpairs_count;
218: return true;
219: }
1.75 misha 220: }
1.8 paf 221:
1.59 paf 222: return false;
223: }
1.48 paf 224:
1.70 misha 225: /// return true if key exists
1.69 misha 226: bool contains(K key){
1.67 misha 227: uint code=hash_code(key);
228: uint index=code%allocated;
1.70 misha 229: for(Pair *pair=refs[index]; pair; pair=pair->link){
230: if(pair->code==code && pair->key==key)
1.67 misha 231: return true;
232: }
233:
234: return false;
235: }
236:
1.59 paf 237: /// get associated [value] by the [key]
238: V get(K key) const {
239: uint code=hash_code(key);
240: uint index=code%allocated;
241: for(Pair *pair=refs[index]; pair; pair=pair->link)
242: if(pair->code==code && pair->key==key)
243: return pair->value;
244:
245: return V(0);
1.33 paf 246: }
1.70 misha 247:
1.82 ! misha 248: #ifdef HASH_ORDER
! 249: V first_value() const {
! 250: return (first) ? first->value : V(0);
! 251: }
! 252:
! 253: V last_value() const {
! 254: return (fpairs_count) ? ((Pair *)((char *)last - offsetof(Pair, next)))->value : V(0);
! 255: }
! 256: #endif
! 257:
1.51 paf 258: /// put a [value] under the [key] if that [key] existed @returns existed or not
1.63 paf 259: bool put_replaced(K key, V value) {
1.59 paf 260: if(!value) {
261: remove(key);
262: return false;
263: }
264: uint code=hash_code(key);
265: uint index=code%allocated;
266: for(Pair *pair=refs[index]; pair; pair=pair->link)
267: if(pair->code==code && pair->key==key) {
268: // found a pair with the same key, replacing
269: pair->value=value;
270: return true;
271: }
272:
273: // proper pair not found
274: return false;
1.64 paf 275: }
276:
1.51 paf 277: /// put a [value] under the [key] if that [key] NOT existed @returns existed or not
1.59 paf 278: bool put_dont_replace(K key, V value) {
279: if(!value) {
280: remove(key);
281: return false;
282: }
283: if(is_full())
284: expand();
285:
286: uint code=hash_code(key);
287: uint index=code%allocated;
288: Pair **ref=&refs[index];
289: for(Pair *pair=*ref; pair; pair=pair->link)
290: if(pair->code==code && pair->key==key) {
291: // found a pair with the same key, NOT replacing
292: return true;
293: }
294:
295: // proper pair not found -- create&link_in new pair
296: if(!*ref) // root cell were fused_refs?
297: fused_refs++; // not, we'll use it and record the fact
1.79 misha 298: HASH_NEW_PAIR(code, key, value);
1.59 paf 299: fpairs_count++;
300: return false;
301: }
1.18 paf 302:
1.79 misha 303: /// put all 'src' values if NO with same key existed
1.75 misha 304: void merge_dont_replace(const HASH& src) {
1.79 misha 305: #ifdef HASH_ORDER
306: for(Pair *pair=src.first; pair; pair=pair->next)
307: #else
1.59 paf 308: for(int i=0; i<src.allocated; i++)
309: for(Pair *pair=src.refs[i]; pair; pair=pair->link)
1.79 misha 310: #endif
1.59 paf 311: put_dont_replace(pair->key, pair->value);
1.36 paf 312: }
1.11 paf 313:
1.29 paf 314: /// number of elements in hash
1.59 paf 315: int count() const { return fpairs_count; }
1.25 paf 316:
1.59 paf 317: /// iterate over all pairs
318: template<typename I> void for_each(void callback(K, V, I), I info) const {
1.79 misha 319: HASH_FOR_EACH
1.76 misha 320: callback(pair->key, pair->value, info);
1.59 paf 321: }
1.45 paf 322:
1.59 paf 323: /// iterate over all pairs
324: template<typename I> void for_each_ref(void callback(K, V&, I), I info) const {
1.79 misha 325: HASH_FOR_EACH
1.76 misha 326: callback(pair->key, pair->value, info);
1.59 paf 327: }
1.38 paf 328:
1.59 paf 329: /// iterate over all pairs until condition becomes true, return that element
330: template<typename I> V first_that(bool callback(K, V, I), I info) const {
1.79 misha 331: HASH_FOR_EACH
1.75 misha 332: if(callback(pair->key, pair->value, info))
333: return pair->value;
1.59 paf 334: return V(0);
335: }
1.27 paf 336:
1.29 paf 337: /// remove all elements
1.59 paf 338: void clear() {
339: memset(refs, 0, sizeof(*refs)*allocated);
340: fpairs_count=fused_refs=0;
1.75 misha 341: #ifdef HASH_ORDER
342: first=0;
343: last=&first;
344: #endif
1.59 paf 345: }
1.15 paf 346:
1.74 misha 347: protected:
1.1 paf 348:
1.39 paf 349: /// expand when these %% of allocated exausted
1.1 paf 350: enum {
351: THRESHOLD_PERCENT=75
352: };
1.9 paf 353:
1.61 paf 354: /// the index of [allocated] in [Hash_allocates]
1.19 paf 355: int allocates_index;
1.1 paf 356:
1.39 paf 357: /// number of allocated pairs
1.19 paf 358: int allocated;
1.1 paf 359:
1.59 paf 360: /// helper: expanding when fused_refs == threshold
1.1 paf 361: int threshold;
362:
1.39 paf 363: /// used pairs
1.59 paf 364: int fused_refs;
1.44 parser 365:
366: /// stored pairs total (including those by links)
1.59 paf 367: int fpairs_count;
1.1 paf 368:
1.39 paf 369: /// pair storage
1.59 paf 370: class Pair: public PA_Allocated {
371: public:
1.1 paf 372: uint code;
1.59 paf 373: K key;
374: V value;
1.1 paf 375: Pair *link;
1.75 misha 376: #ifdef HASH_ORDER
377: Pair **prev;
378: Pair *next;
379:
380: Pair(uint acode, K akey, V avalue, Pair *alink, Pair **aprev) : code(acode), key(akey), value(avalue), link(alink),
381: prev(aprev), next(0) { *aprev=this; }
382: #else
383: Pair(uint acode, K akey, V avalue, Pair *alink) : code(acode), key(akey), value(avalue), link(alink) {}
384: #endif
1.2 paf 385: } **refs;
1.1 paf 386:
1.75 misha 387: #ifdef HASH_ORDER
388: Pair *first;
389: Pair **last;
390: #endif
391:
1.39 paf 392: /// filled to threshold: needs expanding
1.59 paf 393: bool is_full() { return fused_refs==threshold; }
1.5 paf 394:
1.39 paf 395: /// allocate larger buffer & rehash
1.59 paf 396: void expand() {
397: int old_allocated=allocated;
398: Pair **old_refs=refs;
399:
400: allocates_index=allocates_index+1<HASH_ALLOCATES_COUNT?allocates_index+1:HASH_ALLOCATES_COUNT-1;
401: // allocated bigger refs array
1.61 paf 402: allocated=Hash_allocates[allocates_index];
1.59 paf 403: threshold=allocated*THRESHOLD_PERCENT/100;
404: refs=new(UseGC) Pair*[allocated];
405:
406: // rehash
407: Pair **old_ref=old_refs;
408: for(int old_index=0; old_index<old_allocated; old_index++)
409: for(Pair *pair=*old_ref++; pair; ) {
410: Pair *next=pair->link;
411:
412: uint new_index=pair->code%allocated;
413: Pair **new_ref=&refs[new_index];
414: pair->link=*new_ref;
415: *new_ref=pair;
416:
417: pair=next;
418: }
419:
420: delete[] old_refs;
421: }
1.4 paf 422:
423: private: //disabled
424:
1.75 misha 425: HASH& operator = (const HASH&) { return *this; }
1.1 paf 426: };
1.59 paf 427:
1.74 misha 428: /**
1.75 misha 429: Simple String::body hash.
430: Allows hash code caching
1.74 misha 431: */
432:
433: #ifdef HASH_CODE_CACHING
434:
1.75 misha 435: template<typename V> class HASH_STRING: public HASH<const CORD,V> {
1.74 misha 436: public:
437:
1.75 misha 438: typedef typename HASH<const CORD,V>::Pair Pair;
1.74 misha 439: typedef const String::Body &K;
440:
441: typedef K key_type;
442:
443: /// put a [value] under the [key] @returns existed or not
444: bool put(K str, V value) {
445: if(!value) {
446: remove(str);
447: return false;
448: }
449: if(this->is_full())
450: this->expand();
451:
452: CORD key=str.get_cord();
453:
454: uint code=str.get_hash_code();
455: uint index=code%this->allocated;
456: Pair **ref=&this->refs[index];
457: for(Pair *pair=*ref; pair; pair=pair->link)
458: if(pair->code==code && CORD_cmp(pair->key,key)==0) {
459: // found a pair with the same key
460: pair->value=value;
461: return true;
462: }
463:
464: // proper pair not found -- create&link_in new pair
465: if(!*ref) // root cell were fused_refs?
466: this->fused_refs++; // not, we'll use it and record the fact
1.79 misha 467: HASH_NEW_PAIR(code, key, value);
1.74 misha 468: this->fpairs_count++;
469: return false;
470: }
471:
472: /// remove the [key] @returns existed or not
473: bool remove(K str) {
474: CORD key=str.get_cord();
475: uint code=str.get_hash_code();
476: uint index=code%this->allocated;
1.75 misha 477: for(Pair **ref=&this->refs[index]; *ref; ref=&(*ref)->link){
478: Pair *pair=*ref;
479: if(pair->code==code && CORD_cmp(pair->key,key)==0) {
1.74 misha 480: // found a pair with the same key
1.75 misha 481: Pair *next=pair->link;
482: #ifdef HASH_ORDER
483: *(pair->prev)=pair->next;
484: if(pair->next)
485: pair->next->prev=pair->prev;
486: else
487: this->last=pair->prev;
488: #endif
489: delete pair;
1.74 misha 490: *ref=next;
491: --this->fpairs_count;
492: return true;
493: }
1.75 misha 494: }
1.74 misha 495:
496: return false;
497: }
498:
499: /// return true if key exists
500: bool contains(K str){
501: CORD key=str.get_cord();
502: uint code=str.get_hash_code();
503: uint index=code%this->allocated;
504: for(Pair *pair=this->refs[index]; pair; pair=pair->link){
505: if(pair->code==code && CORD_cmp(pair->key,key)==0)
506: return true;
507: }
508:
509: return false;
510: }
511:
512: /// get associated [value] by the [key]
513: V get(K str) const {
514: CORD key=str.get_cord();
515: uint code=str.get_hash_code();
516: uint index=code%this->allocated;
517: for(Pair *pair=this->refs[index]; pair; pair=pair->link)
518: if(pair->code==code && CORD_cmp(pair->key,key)==0)
519: return pair->value;
520:
521: return V(0);
522: }
523:
524: /// put a [value] under the [key] if that [key] existed @returns existed or not
525: bool put_replaced(K str, V value) {
526: if(!value) {
527: remove(str);
528: return false;
529: }
530:
531: CORD key=str.get_cord();
532: uint code=str.get_hash_code();
533: uint index=code%this->allocated;
534: for(Pair *pair=this->refs[index]; pair; pair=pair->link)
535: if(pair->code==code && CORD_cmp(pair->key,key)==0) {
536: // found a pair with the same key, replacing
537: pair->value=value;
538: return true;
539: }
540:
541: // proper pair not found
542: return false;
543: }
544:
545: /// put a [value] under the [key] if that [key] NOT existed @returns existed or not
546: bool put_dont_replace(K str, V value) {
547: if(!value) {
548: remove(str);
549: return false;
550: }
551: if(this->is_full())
552: this->expand();
553:
554: CORD key=str.get_cord();
555: uint code=str.get_hash_code();
556: uint index=code%this->allocated;
557: Pair **ref=&this->refs[index];
558: for(Pair *pair=*ref; pair; pair=pair->link)
559: if(pair->code==code && CORD_cmp(pair->key,key)==0) {
560: // found a pair with the same key, NOT replacing
561: return true;
562: }
563:
564: // proper pair not found -- create&link_in new pair
565: if(!*ref) // root cell were fused_refs?
566: this->fused_refs++; // not, we'll use it and record the fact
1.79 misha 567: HASH_NEW_PAIR(code, key, value);
1.74 misha 568: this->fpairs_count++;
569: return false;
570: }
571:
1.79 misha 572: /// put all 'src' values if NO with same key existed
573: void merge_dont_replace(const HASH_STRING& src) {
1.76 misha 574: #ifdef HASH_ORDER
1.79 misha 575: for(Pair *pair=src.first; pair; pair=pair->next)
1.76 misha 576: #else
1.79 misha 577: for(int i=0; i<src.allocated; i++)
578: for(Pair *pair=src.refs[i]; pair; pair=pair->link)
1.76 misha 579: #endif
1.79 misha 580: put_dont_replace(String::Body(pair->key, pair->code), pair->value);
581: }
582:
583: /// iterate over all pairs
584: template<typename I> void for_each(void callback(K, V, I), I info) const {
585: HASH_FOR_EACH
586: callback(String::Body(pair->key, pair->code), pair->value, info);
1.74 misha 587: }
588:
589: /// iterate over all pairs
590: template<typename I> void for_each_ref(void callback(K, V&, I), I info) const {
1.79 misha 591: HASH_FOR_EACH
592: callback(String::Body(pair->key, pair->code), pair->value, info);
1.74 misha 593: }
594:
595: /// iterate over all pairs until condition becomes true, return that element
596: template<typename I> V first_that(bool callback(K, V, I), I info) const {
1.79 misha 597: HASH_FOR_EACH
1.75 misha 598: if(callback(String::Body(pair->key, pair->code), pair->value, info))
599: return pair->value;
1.74 misha 600: return V(0);
601: }
1.80 misha 602:
603: /// simple hash iterator
604: class Iterator {
605: const HASH_STRING<V>& fhash;
606: Pair *fcurrent;
607: public:
608: Iterator(const HASH_STRING<V>& ahash): fhash(ahash) {
609: fcurrent=fhash.first;
610: }
611:
612: operator bool () {
613: return fcurrent != 0;
614: }
615:
616: void next() {
617: fcurrent=fcurrent->next;
618: }
619:
620: String::Body key(){
621: return String::Body(fcurrent->key, fcurrent->code);
622: }
623:
624: V value(){
625: return fcurrent->value;
626: }
627: };
1.74 misha 628: };
1.78 misha 629: #else //HASH_CODE_CACHING
1.74 misha 630:
1.75 misha 631: template<typename V> class HASH_STRING: public HASH<const String::Body,V>{};
1.74 misha 632:
1.78 misha 633: #endif //HASH_CODE_CACHING
1.74 misha 634:
1.75 misha 635: #ifndef HASH_ORDER
1.74 misha 636: /// Auto-object used to temporarily substituting/removing string hash values
1.59 paf 637: template <typename K, typename V>
1.55 paf 638: class Temp_hash_value {
1.74 misha 639: HashString<V> &fhash;
1.59 paf 640: K fname;
641: V saved_value;
1.55 paf 642: public:
1.74 misha 643: Temp_hash_value(HashString<V>& ahash, K aname, V avalue) :
1.55 paf 644: fhash(ahash),
645: fname(aname),
646: saved_value(ahash.get(aname)) {
647: fhash.put(aname, avalue);
648: }
649: ~Temp_hash_value() {
650: fhash.put(fname, saved_value);
651: }
652: };
1.75 misha 653: #endif
1.1 paf 654:
1.75 misha 655: #endif //PA_HASH_CLASS
E-mail: