Annotation of parser3/src/include/pa_hash.h, revision 1.80
1.28 paf 1: /** @file
1.29 paf 2: Parser: hash class decl.
3:
1.74 misha 4: Copyright (c) 2001-2009 ArtLebedev Group (http://www.artlebedev.com)
1.29 paf 5:
1.54 paf 6: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
1.1 paf 7: */
8:
1.59 paf 9: /*
10: The prime numbers used from zend_hash.c,
11: the part of Zend scripting engine library,
12: Copyrighted (C) 1999-2000 Zend Technologies Ltd.
13: http://www.zend.com/license/0_92.txt
14: For more information about Zend please visit http://www.zend.com/
15: */
16:
1.1 paf 17: #ifndef PA_HASH_H
18: #define PA_HASH_H
1.56 paf 19:
1.80 ! misha 20: static const char * const IDENT_HASH_H="$Date: 2009-08-08 13:30:21 $";
1.1 paf 21:
1.59 paf 22: #include "pa_memory.h"
1.1 paf 23: #include "pa_types.h"
1.74 misha 24: #include "pa_string.h"
1.59 paf 25:
26: const int HASH_ALLOCATES_COUNT=29;
1.1 paf 27:
1.61 paf 28: /** Zend comment: Generated on an Octa-ALPHA 300MHz CPU & 2.5GB RAM monster
29:
30: paf: HPUX ld could not handle static member: unsatisfied symbols
31: */
32: static uint Hash_allocates[HASH_ALLOCATES_COUNT]={
33: 5, 11, 19, 53, 107, 223, 463, 983, 1979, 3907, 7963,
34: 16229, 32531, 65407, 130987, 262237, 524521, 1048793,
35: 2097397, 4194103, 8388857, 16777447, 33554201, 67108961,
36: 134217487, 268435697, 536870683, 1073741621, 2147483399};
37:
1.68 misha 38: /// useful generic hash function
39: inline void generic_hash_code(uint& result, char c) {
40: result=(result<<4)+c;
41: if(uint g=(result&0xF0000000)) {
42: result=result^(g>>24);
43: result=result^g;
44: }
45: }
46: /// useful generic hash function
47: inline void generic_hash_code(uint& result, const char* s) {
48: while(char c=*s++) {
49: result=(result<<4)+c;
50: if(uint g=(result&0xF0000000)) {
51: result=result^(g>>24);
52: result=result^g;
53: }
54: }
55: }
56:
57: /// useful generic hash function
58: inline void generic_hash_code(uint& result, const char* buf, size_t size) {
59: const char* end=buf+size;
60: while(buf<end) {
61: result=(result<<4)+*buf++;
62: if(uint g=(result&0xF0000000)) {
63: result=result^(g>>24);
64: result=result^g;
65: }
66: }
67: }
68:
69: /// simple hash code of int. used by EXIF mapping
70: inline uint hash_code(int self) {
71: uint result=0;
72: generic_hash_code(result, (const char*)&self, sizeof(self));
73: return result;
74: }
75:
1.75 misha 76: #endif // PA_HASH_H
77:
78: #ifndef PA_HASH_CLASS
79: #define PA_HASH_CLASS
1.29 paf 80: /**
1.59 paf 81: Simple hash.
1.29 paf 82:
1.59 paf 83: Automatically rehashed when almost is_full.
1.51 paf 84: Contains no 0 values.
85: get returning 0 means there were no such.
86: "put value 0" means "remove"
1.29 paf 87: */
1.75 misha 88: #ifdef HASH_ORDER
89:
90: #undef HASH
91: #undef HASH_STRING
1.79 misha 92: #undef HASH_NEW_PAIR
93: #undef HASH_FOR_EACH
1.75 misha 94:
95: #define HASH OrderedHash
96: #define HASH_STRING OrderedHashString
1.79 misha 97: #define HASH_NEW_PAIR(code, key, value) *ref=new Pair(code, key, value, *ref, this->last); this->last=&((*ref)->next)
98:
99: #define HASH_FOR_EACH \
100: for(Pair *pair=this->first; pair; pair=pair->next)
1.75 misha 101:
102: #else
103:
104: #define HASH Hash
105: #define HASH_STRING HashString
1.79 misha 106: #define HASH_NEW_PAIR(code, key, value) *ref=new Pair(code, key, value, *ref)
107:
108: #define HASH_FOR_EACH \
109: Pair **ref=this->refs; \
110: for(int index=0; index<this->allocated; index++) \
111: for(Pair *pair=*ref++; pair; pair=pair->link)
1.75 misha 112:
113: #endif
114:
115: template<typename K, typename V> class HASH: public PA_Object {
1.1 paf 116: public:
117:
1.59 paf 118: typedef K key_type;
119: typedef V value_type;
1.3 paf 120:
1.75 misha 121: HASH() {
1.61 paf 122: allocated=Hash_allocates[allocates_index=0];
1.59 paf 123: threshold=allocated*THRESHOLD_PERCENT/100;
124: fpairs_count=fused_refs=0;
125: refs=new(UseGC) Pair*[allocated];
1.75 misha 126: #ifdef HASH_ORDER
127: first=0;
128: last=&first;
129: #endif
1.59 paf 130: }
1.25 paf 131:
1.75 misha 132: HASH(const HASH& source) {
1.59 paf 133: allocates_index=source.allocates_index;
134: allocated=source.allocated;
135: threshold=source.threshold;
136: fused_refs=source.fused_refs;
137: fpairs_count=source.fpairs_count;
138: refs=new(UseGC) Pair*[allocated];
1.75 misha 139: #ifdef HASH_ORDER
140: first=0;
141: last=&first;
142: #endif
1.59 paf 143: // clone & rehash
144: Pair **old_ref=source.refs;
145: for(int index=0; index<allocated; index++)
146: for(Pair *pair=*old_ref++; pair; ) {
147: Pair *next=pair->link;
1.45 paf 148:
1.75 misha 149: Pair **ref=&refs[index];
1.79 misha 150: HASH_NEW_PAIR(pair->code, pair->key, pair->value);
1.38 paf 151:
1.59 paf 152: pair=next;
153: }
1.43 parser 154: }
155:
1.73 misha 156: #ifdef USE_DESTRUCTORS
1.75 misha 157: ~HASH() {
1.72 misha 158: Pair **ref=refs;
159: for(int index=0; index<allocated; index++)
160: for(Pair *pair=*ref++; pair;){
161: Pair *next=pair->link;
162: delete pair;
163: pair=next;
164: }
1.71 misha 165: delete[] refs;
166: }
1.73 misha 167: #endif
1.71 misha 168:
1.59 paf 169: /// put a [value] under the [key] @returns existed or not
170: bool put(K key, V value) {
171: if(!value) {
172: remove(key);
173: return false;
174: }
175: if(is_full())
176: expand();
177:
178: uint code=hash_code(key);
179: uint index=code%allocated;
180: Pair **ref=&refs[index];
181: for(Pair *pair=*ref; pair; pair=pair->link)
182: if(pair->code==code && pair->key==key) {
183: // found a pair with the same key
184: pair->value=value;
185: return true;
186: }
187:
188: // proper pair not found -- create&link_in new pair
189: if(!*ref) // root cell were fused_refs?
190: fused_refs++; // not, we'll use it and record the fact
1.79 misha 191: HASH_NEW_PAIR(code, key, value);
1.59 paf 192: fpairs_count++;
193: return false;
1.24 paf 194: }
1.10 paf 195:
1.59 paf 196: /// remove the [key] @returns existed or not
197: bool remove(K key) {
198: uint code=hash_code(key);
199: uint index=code%allocated;
1.75 misha 200: for(Pair **ref=&refs[index]; *ref; ref=&(*ref)->link){
201: Pair *pair=*ref;
202: if(pair->code==code && pair->key==key) {
1.59 paf 203: // found a pair with the same key
1.75 misha 204: Pair *next=pair->link;
205: #ifdef HASH_ORDER
206: *(pair->prev)=pair->next;
207: if(pair->next)
208: pair->next->prev=pair->prev;
209: else
210: last=pair->prev;
211: #endif
212: delete pair;
1.59 paf 213: *ref=next;
214: --fpairs_count;
215: return true;
216: }
1.75 misha 217: }
1.8 paf 218:
1.59 paf 219: return false;
220: }
1.48 paf 221:
1.70 misha 222: /// return true if key exists
1.69 misha 223: bool contains(K key){
1.67 misha 224: uint code=hash_code(key);
225: uint index=code%allocated;
1.70 misha 226: for(Pair *pair=refs[index]; pair; pair=pair->link){
227: if(pair->code==code && pair->key==key)
1.67 misha 228: return true;
229: }
230:
231: return false;
232: }
233:
1.59 paf 234: /// get associated [value] by the [key]
235: V get(K key) const {
236: uint code=hash_code(key);
237: uint index=code%allocated;
238: for(Pair *pair=refs[index]; pair; pair=pair->link)
239: if(pair->code==code && pair->key==key)
240: return pair->value;
241:
242: return V(0);
1.33 paf 243: }
1.70 misha 244:
1.51 paf 245: /// put a [value] under the [key] if that [key] existed @returns existed or not
1.63 paf 246: bool put_replaced(K key, V value) {
1.59 paf 247: if(!value) {
248: remove(key);
249: return false;
250: }
251: uint code=hash_code(key);
252: uint index=code%allocated;
253: for(Pair *pair=refs[index]; pair; pair=pair->link)
254: if(pair->code==code && pair->key==key) {
255: // found a pair with the same key, replacing
256: pair->value=value;
257: return true;
258: }
259:
260: // proper pair not found
261: return false;
1.64 paf 262: }
263:
1.51 paf 264: /// put a [value] under the [key] if that [key] NOT existed @returns existed or not
1.59 paf 265: bool put_dont_replace(K key, V value) {
266: if(!value) {
267: remove(key);
268: return false;
269: }
270: if(is_full())
271: expand();
272:
273: uint code=hash_code(key);
274: uint index=code%allocated;
275: Pair **ref=&refs[index];
276: for(Pair *pair=*ref; pair; pair=pair->link)
277: if(pair->code==code && pair->key==key) {
278: // found a pair with the same key, NOT replacing
279: return true;
280: }
281:
282: // proper pair not found -- create&link_in new pair
283: if(!*ref) // root cell were fused_refs?
284: fused_refs++; // not, we'll use it and record the fact
1.79 misha 285: HASH_NEW_PAIR(code, key, value);
1.59 paf 286: fpairs_count++;
287: return false;
288: }
1.18 paf 289:
1.79 misha 290: /// put all 'src' values if NO with same key existed
1.75 misha 291: void merge_dont_replace(const HASH& src) {
1.79 misha 292: #ifdef HASH_ORDER
293: for(Pair *pair=src.first; pair; pair=pair->next)
294: #else
1.59 paf 295: for(int i=0; i<src.allocated; i++)
296: for(Pair *pair=src.refs[i]; pair; pair=pair->link)
1.79 misha 297: #endif
1.59 paf 298: put_dont_replace(pair->key, pair->value);
1.36 paf 299: }
1.11 paf 300:
1.29 paf 301: /// number of elements in hash
1.59 paf 302: int count() const { return fpairs_count; }
1.25 paf 303:
1.59 paf 304: /// iterate over all pairs
305: template<typename I> void for_each(void callback(K, V, I), I info) const {
1.79 misha 306: HASH_FOR_EACH
1.76 misha 307: callback(pair->key, pair->value, info);
1.59 paf 308: }
1.45 paf 309:
1.59 paf 310: /// iterate over all pairs
311: template<typename I> void for_each_ref(void callback(K, V&, I), I info) const {
1.79 misha 312: HASH_FOR_EACH
1.76 misha 313: callback(pair->key, pair->value, info);
1.59 paf 314: }
1.38 paf 315:
1.59 paf 316: /// iterate over all pairs until condition becomes true, return that element
317: template<typename I> V first_that(bool callback(K, V, I), I info) const {
1.79 misha 318: HASH_FOR_EACH
1.75 misha 319: if(callback(pair->key, pair->value, info))
320: return pair->value;
1.59 paf 321: return V(0);
322: }
1.27 paf 323:
1.29 paf 324: /// remove all elements
1.59 paf 325: void clear() {
326: memset(refs, 0, sizeof(*refs)*allocated);
327: fpairs_count=fused_refs=0;
1.75 misha 328: #ifdef HASH_ORDER
329: first=0;
330: last=&first;
331: #endif
1.59 paf 332: }
1.15 paf 333:
1.74 misha 334: protected:
1.1 paf 335:
1.39 paf 336: /// expand when these %% of allocated exausted
1.1 paf 337: enum {
338: THRESHOLD_PERCENT=75
339: };
1.9 paf 340:
1.61 paf 341: /// the index of [allocated] in [Hash_allocates]
1.19 paf 342: int allocates_index;
1.1 paf 343:
1.39 paf 344: /// number of allocated pairs
1.19 paf 345: int allocated;
1.1 paf 346:
1.59 paf 347: /// helper: expanding when fused_refs == threshold
1.1 paf 348: int threshold;
349:
1.39 paf 350: /// used pairs
1.59 paf 351: int fused_refs;
1.44 parser 352:
353: /// stored pairs total (including those by links)
1.59 paf 354: int fpairs_count;
1.1 paf 355:
1.39 paf 356: /// pair storage
1.59 paf 357: class Pair: public PA_Allocated {
358: public:
1.1 paf 359: uint code;
1.59 paf 360: K key;
361: V value;
1.1 paf 362: Pair *link;
1.75 misha 363: #ifdef HASH_ORDER
364: Pair **prev;
365: Pair *next;
366:
367: Pair(uint acode, K akey, V avalue, Pair *alink, Pair **aprev) : code(acode), key(akey), value(avalue), link(alink),
368: prev(aprev), next(0) { *aprev=this; }
369: #else
370: Pair(uint acode, K akey, V avalue, Pair *alink) : code(acode), key(akey), value(avalue), link(alink) {}
371: #endif
1.2 paf 372: } **refs;
1.1 paf 373:
1.75 misha 374: #ifdef HASH_ORDER
375: Pair *first;
376: Pair **last;
377: #endif
378:
1.39 paf 379: /// filled to threshold: needs expanding
1.59 paf 380: bool is_full() { return fused_refs==threshold; }
1.5 paf 381:
1.39 paf 382: /// allocate larger buffer & rehash
1.59 paf 383: void expand() {
384: int old_allocated=allocated;
385: Pair **old_refs=refs;
386:
387: allocates_index=allocates_index+1<HASH_ALLOCATES_COUNT?allocates_index+1:HASH_ALLOCATES_COUNT-1;
388: // allocated bigger refs array
1.61 paf 389: allocated=Hash_allocates[allocates_index];
1.59 paf 390: threshold=allocated*THRESHOLD_PERCENT/100;
391: refs=new(UseGC) Pair*[allocated];
392:
393: // rehash
394: Pair **old_ref=old_refs;
395: for(int old_index=0; old_index<old_allocated; old_index++)
396: for(Pair *pair=*old_ref++; pair; ) {
397: Pair *next=pair->link;
398:
399: uint new_index=pair->code%allocated;
400: Pair **new_ref=&refs[new_index];
401: pair->link=*new_ref;
402: *new_ref=pair;
403:
404: pair=next;
405: }
406:
407: delete[] old_refs;
408: }
1.4 paf 409:
410: private: //disabled
411:
1.75 misha 412: HASH& operator = (const HASH&) { return *this; }
1.1 paf 413: };
1.59 paf 414:
1.74 misha 415: /**
1.75 misha 416: Simple String::body hash.
417: Allows hash code caching
1.74 misha 418: */
419:
420: #ifdef HASH_CODE_CACHING
421:
1.75 misha 422: template<typename V> class HASH_STRING: public HASH<const CORD,V> {
1.74 misha 423: public:
424:
1.75 misha 425: typedef typename HASH<const CORD,V>::Pair Pair;
1.74 misha 426: typedef const String::Body &K;
427:
428: typedef K key_type;
429:
430: /// put a [value] under the [key] @returns existed or not
431: bool put(K str, V value) {
432: if(!value) {
433: remove(str);
434: return false;
435: }
436: if(this->is_full())
437: this->expand();
438:
439: CORD key=str.get_cord();
440:
441: uint code=str.get_hash_code();
442: uint index=code%this->allocated;
443: Pair **ref=&this->refs[index];
444: for(Pair *pair=*ref; pair; pair=pair->link)
445: if(pair->code==code && CORD_cmp(pair->key,key)==0) {
446: // found a pair with the same key
447: pair->value=value;
448: return true;
449: }
450:
451: // proper pair not found -- create&link_in new pair
452: if(!*ref) // root cell were fused_refs?
453: this->fused_refs++; // not, we'll use it and record the fact
1.79 misha 454: HASH_NEW_PAIR(code, key, value);
1.74 misha 455: this->fpairs_count++;
456: return false;
457: }
458:
459: /// remove the [key] @returns existed or not
460: bool remove(K str) {
461: CORD key=str.get_cord();
462: uint code=str.get_hash_code();
463: uint index=code%this->allocated;
1.75 misha 464: for(Pair **ref=&this->refs[index]; *ref; ref=&(*ref)->link){
465: Pair *pair=*ref;
466: if(pair->code==code && CORD_cmp(pair->key,key)==0) {
1.74 misha 467: // found a pair with the same key
1.75 misha 468: Pair *next=pair->link;
469: #ifdef HASH_ORDER
470: *(pair->prev)=pair->next;
471: if(pair->next)
472: pair->next->prev=pair->prev;
473: else
474: this->last=pair->prev;
475: #endif
476: delete pair;
1.74 misha 477: *ref=next;
478: --this->fpairs_count;
479: return true;
480: }
1.75 misha 481: }
1.74 misha 482:
483: return false;
484: }
485:
486: /// return true if key exists
487: bool contains(K str){
488: CORD key=str.get_cord();
489: uint code=str.get_hash_code();
490: uint index=code%this->allocated;
491: for(Pair *pair=this->refs[index]; pair; pair=pair->link){
492: if(pair->code==code && CORD_cmp(pair->key,key)==0)
493: return true;
494: }
495:
496: return false;
497: }
498:
499: /// get associated [value] by the [key]
500: V get(K str) const {
501: CORD key=str.get_cord();
502: uint code=str.get_hash_code();
503: uint index=code%this->allocated;
504: for(Pair *pair=this->refs[index]; pair; pair=pair->link)
505: if(pair->code==code && CORD_cmp(pair->key,key)==0)
506: return pair->value;
507:
508: return V(0);
509: }
510:
511: /// put a [value] under the [key] if that [key] existed @returns existed or not
512: bool put_replaced(K str, V value) {
513: if(!value) {
514: remove(str);
515: return false;
516: }
517:
518: CORD key=str.get_cord();
519: uint code=str.get_hash_code();
520: uint index=code%this->allocated;
521: for(Pair *pair=this->refs[index]; pair; pair=pair->link)
522: if(pair->code==code && CORD_cmp(pair->key,key)==0) {
523: // found a pair with the same key, replacing
524: pair->value=value;
525: return true;
526: }
527:
528: // proper pair not found
529: return false;
530: }
531:
532: /// put a [value] under the [key] if that [key] NOT existed @returns existed or not
533: bool put_dont_replace(K str, V value) {
534: if(!value) {
535: remove(str);
536: return false;
537: }
538: if(this->is_full())
539: this->expand();
540:
541: CORD key=str.get_cord();
542: uint code=str.get_hash_code();
543: uint index=code%this->allocated;
544: Pair **ref=&this->refs[index];
545: for(Pair *pair=*ref; pair; pair=pair->link)
546: if(pair->code==code && CORD_cmp(pair->key,key)==0) {
547: // found a pair with the same key, NOT replacing
548: return true;
549: }
550:
551: // proper pair not found -- create&link_in new pair
552: if(!*ref) // root cell were fused_refs?
553: this->fused_refs++; // not, we'll use it and record the fact
1.79 misha 554: HASH_NEW_PAIR(code, key, value);
1.74 misha 555: this->fpairs_count++;
556: return false;
557: }
558:
1.79 misha 559: /// put all 'src' values if NO with same key existed
560: void merge_dont_replace(const HASH_STRING& src) {
1.76 misha 561: #ifdef HASH_ORDER
1.79 misha 562: for(Pair *pair=src.first; pair; pair=pair->next)
1.76 misha 563: #else
1.79 misha 564: for(int i=0; i<src.allocated; i++)
565: for(Pair *pair=src.refs[i]; pair; pair=pair->link)
1.76 misha 566: #endif
1.79 misha 567: put_dont_replace(String::Body(pair->key, pair->code), pair->value);
568: }
569:
570: /// iterate over all pairs
571: template<typename I> void for_each(void callback(K, V, I), I info) const {
572: HASH_FOR_EACH
573: callback(String::Body(pair->key, pair->code), pair->value, info);
1.74 misha 574: }
575:
576: /// iterate over all pairs
577: template<typename I> void for_each_ref(void callback(K, V&, I), I info) const {
1.79 misha 578: HASH_FOR_EACH
579: callback(String::Body(pair->key, pair->code), pair->value, info);
1.74 misha 580: }
581:
582: /// iterate over all pairs until condition becomes true, return that element
583: template<typename I> V first_that(bool callback(K, V, I), I info) const {
1.79 misha 584: HASH_FOR_EACH
1.75 misha 585: if(callback(String::Body(pair->key, pair->code), pair->value, info))
586: return pair->value;
1.74 misha 587: return V(0);
588: }
1.80 ! misha 589:
! 590: /// simple hash iterator
! 591: class Iterator {
! 592: const HASH_STRING<V>& fhash;
! 593: Pair *fcurrent;
! 594: public:
! 595: Iterator(const HASH_STRING<V>& ahash): fhash(ahash) {
! 596: fcurrent=fhash.first;
! 597: }
! 598:
! 599: operator bool () {
! 600: return fcurrent != 0;
! 601: }
! 602:
! 603: void next() {
! 604: fcurrent=fcurrent->next;
! 605: }
! 606:
! 607: String::Body key(){
! 608: return String::Body(fcurrent->key, fcurrent->code);
! 609: }
! 610:
! 611: V value(){
! 612: return fcurrent->value;
! 613: }
! 614: };
1.74 misha 615: };
1.78 misha 616: #else //HASH_CODE_CACHING
1.74 misha 617:
1.75 misha 618: template<typename V> class HASH_STRING: public HASH<const String::Body,V>{};
1.74 misha 619:
1.78 misha 620: #endif //HASH_CODE_CACHING
1.74 misha 621:
1.75 misha 622: #ifndef HASH_ORDER
1.74 misha 623: /// Auto-object used to temporarily substituting/removing string hash values
1.59 paf 624: template <typename K, typename V>
1.55 paf 625: class Temp_hash_value {
1.74 misha 626: HashString<V> &fhash;
1.59 paf 627: K fname;
628: V saved_value;
1.55 paf 629: public:
1.74 misha 630: Temp_hash_value(HashString<V>& ahash, K aname, V avalue) :
1.55 paf 631: fhash(ahash),
632: fname(aname),
633: saved_value(ahash.get(aname)) {
634: fhash.put(aname, avalue);
635: }
636: ~Temp_hash_value() {
637: fhash.put(fname, saved_value);
638: }
639: };
1.75 misha 640: #endif
1.1 paf 641:
1.75 misha 642: #endif //PA_HASH_CLASS
E-mail: