Annotation of parser3/src/include/pa_hash.h, revision 1.83
1.28 paf 1: /** @file
1.29 paf 2: Parser: hash class decl.
3:
1.74 misha 4: Copyright (c) 2001-2009 ArtLebedev Group (http://www.artlebedev.com)
1.29 paf 5:
1.54 paf 6: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
1.1 paf 7: */
8:
1.59 paf 9: /*
10: The prime numbers used from zend_hash.c,
11: the part of Zend scripting engine library,
12: Copyrighted (C) 1999-2000 Zend Technologies Ltd.
13: http://www.zend.com/license/0_92.txt
14: For more information about Zend please visit http://www.zend.com/
15: */
16:
1.1 paf 17: #ifndef PA_HASH_H
18: #define PA_HASH_H
1.56 paf 19:
1.83 ! moko 20: static const char * const IDENT_HASH_H="$Date: 2010-08-04 15:08:44 $";
1.1 paf 21:
1.59 paf 22: #include "pa_memory.h"
1.1 paf 23: #include "pa_types.h"
1.74 misha 24: #include "pa_string.h"
1.59 paf 25:
26: const int HASH_ALLOCATES_COUNT=29;
1.1 paf 27:
1.61 paf 28: /** Zend comment: Generated on an Octa-ALPHA 300MHz CPU & 2.5GB RAM monster
29:
30: paf: HPUX ld could not handle static member: unsatisfied symbols
31: */
32: static uint Hash_allocates[HASH_ALLOCATES_COUNT]={
33: 5, 11, 19, 53, 107, 223, 463, 983, 1979, 3907, 7963,
34: 16229, 32531, 65407, 130987, 262237, 524521, 1048793,
35: 2097397, 4194103, 8388857, 16777447, 33554201, 67108961,
36: 134217487, 268435697, 536870683, 1073741621, 2147483399};
37:
1.68 misha 38: /// useful generic hash function
39: inline void generic_hash_code(uint& result, char c) {
40: result=(result<<4)+c;
41: if(uint g=(result&0xF0000000)) {
42: result=result^(g>>24);
43: result=result^g;
44: }
45: }
46: /// useful generic hash function
47: inline void generic_hash_code(uint& result, const char* s) {
48: while(char c=*s++) {
49: result=(result<<4)+c;
50: if(uint g=(result&0xF0000000)) {
51: result=result^(g>>24);
52: result=result^g;
53: }
54: }
55: }
56:
57: /// useful generic hash function
58: inline void generic_hash_code(uint& result, const char* buf, size_t size) {
59: const char* end=buf+size;
60: while(buf<end) {
61: result=(result<<4)+*buf++;
62: if(uint g=(result&0xF0000000)) {
63: result=result^(g>>24);
64: result=result^g;
65: }
66: }
67: }
68:
69: /// simple hash code of int. used by EXIF mapping
70: inline uint hash_code(int self) {
71: uint result=0;
72: generic_hash_code(result, (const char*)&self, sizeof(self));
73: return result;
74: }
75:
1.75 misha 76: #endif // PA_HASH_H
77:
78: #ifndef PA_HASH_CLASS
79: #define PA_HASH_CLASS
1.29 paf 80: /**
1.59 paf 81: Simple hash.
1.29 paf 82:
1.59 paf 83: Automatically rehashed when almost is_full.
1.51 paf 84: Contains no 0 values.
85: get returning 0 means there were no such.
86: "put value 0" means "remove"
1.29 paf 87: */
1.75 misha 88: #ifdef HASH_ORDER
89:
90: #undef HASH
91: #undef HASH_STRING
1.79 misha 92: #undef HASH_NEW_PAIR
93: #undef HASH_FOR_EACH
1.75 misha 94:
95: #define HASH OrderedHash
96: #define HASH_STRING OrderedHashString
1.79 misha 97: #define HASH_NEW_PAIR(code, key, value) *ref=new Pair(code, key, value, *ref, this->last); this->last=&((*ref)->next)
98:
99: #define HASH_FOR_EACH \
100: for(Pair *pair=this->first; pair; pair=pair->next)
1.75 misha 101:
102: #else
103:
104: #define HASH Hash
105: #define HASH_STRING HashString
1.79 misha 106: #define HASH_NEW_PAIR(code, key, value) *ref=new Pair(code, key, value, *ref)
107:
108: #define HASH_FOR_EACH \
109: Pair **ref=this->refs; \
110: for(int index=0; index<this->allocated; index++) \
111: for(Pair *pair=*ref++; pair; pair=pair->link)
1.75 misha 112:
113: #endif
114:
115: template<typename K, typename V> class HASH: public PA_Object {
1.1 paf 116: public:
117:
1.59 paf 118: typedef K key_type;
119: typedef V value_type;
1.3 paf 120:
1.75 misha 121: HASH() {
1.61 paf 122: allocated=Hash_allocates[allocates_index=0];
1.59 paf 123: fpairs_count=fused_refs=0;
124: refs=new(UseGC) Pair*[allocated];
1.75 misha 125: #ifdef HASH_ORDER
126: first=0;
127: last=&first;
128: #endif
1.59 paf 129: }
1.25 paf 130:
1.75 misha 131: HASH(const HASH& source) {
1.59 paf 132: allocates_index=source.allocates_index;
133: allocated=source.allocated;
134: fused_refs=source.fused_refs;
135: fpairs_count=source.fpairs_count;
136: refs=new(UseGC) Pair*[allocated];
1.81 moko 137: // clone & rehash
1.75 misha 138: #ifdef HASH_ORDER
139: first=0;
140: last=&first;
1.81 moko 141: for(Pair *pair=source.first; pair; pair=pair->next)
142: {
143: uint index=pair->code%allocated;
144: Pair **ref=&refs[index];
145: HASH_NEW_PAIR(pair->code, pair->key, pair->value);
146: }
147: #else
148: for(int i=0; i<source.allocated; i++)
149: for(Pair *pair=source.refs[i]; pair; pair=pair->link)
150: {
151: Pair **ref=&refs[i];
1.79 misha 152: HASH_NEW_PAIR(pair->code, pair->key, pair->value);
1.59 paf 153: }
1.81 moko 154: #endif
1.43 parser 155: }
156:
1.73 misha 157: #ifdef USE_DESTRUCTORS
1.75 misha 158: ~HASH() {
1.72 misha 159: Pair **ref=refs;
160: for(int index=0; index<allocated; index++)
161: for(Pair *pair=*ref++; pair;){
162: Pair *next=pair->link;
163: delete pair;
164: pair=next;
165: }
1.71 misha 166: delete[] refs;
167: }
1.73 misha 168: #endif
1.71 misha 169:
1.59 paf 170: /// put a [value] under the [key] @returns existed or not
171: bool put(K key, V value) {
172: if(!value) {
173: remove(key);
174: return false;
175: }
176: if(is_full())
177: expand();
178:
179: uint code=hash_code(key);
180: uint index=code%allocated;
181: Pair **ref=&refs[index];
182: for(Pair *pair=*ref; pair; pair=pair->link)
183: if(pair->code==code && pair->key==key) {
184: // found a pair with the same key
185: pair->value=value;
186: return true;
187: }
188:
189: // proper pair not found -- create&link_in new pair
190: if(!*ref) // root cell were fused_refs?
191: fused_refs++; // not, we'll use it and record the fact
1.79 misha 192: HASH_NEW_PAIR(code, key, value);
1.59 paf 193: fpairs_count++;
194: return false;
1.24 paf 195: }
1.10 paf 196:
1.59 paf 197: /// remove the [key] @returns existed or not
198: bool remove(K key) {
199: uint code=hash_code(key);
200: uint index=code%allocated;
1.75 misha 201: for(Pair **ref=&refs[index]; *ref; ref=&(*ref)->link){
202: Pair *pair=*ref;
203: if(pair->code==code && pair->key==key) {
1.59 paf 204: // found a pair with the same key
1.75 misha 205: Pair *next=pair->link;
206: #ifdef HASH_ORDER
207: *(pair->prev)=pair->next;
208: if(pair->next)
209: pair->next->prev=pair->prev;
210: else
211: last=pair->prev;
212: #endif
213: delete pair;
1.59 paf 214: *ref=next;
215: --fpairs_count;
216: return true;
217: }
1.75 misha 218: }
1.8 paf 219:
1.59 paf 220: return false;
221: }
1.48 paf 222:
1.70 misha 223: /// return true if key exists
1.69 misha 224: bool contains(K key){
1.67 misha 225: uint code=hash_code(key);
226: uint index=code%allocated;
1.70 misha 227: for(Pair *pair=refs[index]; pair; pair=pair->link){
228: if(pair->code==code && pair->key==key)
1.67 misha 229: return true;
230: }
231:
232: return false;
233: }
234:
1.59 paf 235: /// get associated [value] by the [key]
236: V get(K key) const {
237: uint code=hash_code(key);
238: uint index=code%allocated;
239: for(Pair *pair=refs[index]; pair; pair=pair->link)
240: if(pair->code==code && pair->key==key)
241: return pair->value;
242:
243: return V(0);
1.33 paf 244: }
1.70 misha 245:
1.82 misha 246: #ifdef HASH_ORDER
247: V first_value() const {
248: return (first) ? first->value : V(0);
249: }
250:
251: V last_value() const {
252: return (fpairs_count) ? ((Pair *)((char *)last - offsetof(Pair, next)))->value : V(0);
253: }
254: #endif
255:
1.51 paf 256: /// put a [value] under the [key] if that [key] existed @returns existed or not
1.63 paf 257: bool put_replaced(K key, V value) {
1.59 paf 258: if(!value) {
259: remove(key);
260: return false;
261: }
262: uint code=hash_code(key);
263: uint index=code%allocated;
264: for(Pair *pair=refs[index]; pair; pair=pair->link)
265: if(pair->code==code && pair->key==key) {
266: // found a pair with the same key, replacing
267: pair->value=value;
268: return true;
269: }
270:
271: // proper pair not found
272: return false;
1.64 paf 273: }
274:
1.51 paf 275: /// put a [value] under the [key] if that [key] NOT existed @returns existed or not
1.59 paf 276: bool put_dont_replace(K key, V value) {
277: if(!value) {
278: remove(key);
279: return false;
280: }
281: if(is_full())
282: expand();
283:
284: uint code=hash_code(key);
285: uint index=code%allocated;
286: Pair **ref=&refs[index];
287: for(Pair *pair=*ref; pair; pair=pair->link)
288: if(pair->code==code && pair->key==key) {
289: // found a pair with the same key, NOT replacing
290: return true;
291: }
292:
293: // proper pair not found -- create&link_in new pair
294: if(!*ref) // root cell were fused_refs?
295: fused_refs++; // not, we'll use it and record the fact
1.79 misha 296: HASH_NEW_PAIR(code, key, value);
1.59 paf 297: fpairs_count++;
298: return false;
299: }
1.18 paf 300:
1.79 misha 301: /// put all 'src' values if NO with same key existed
1.75 misha 302: void merge_dont_replace(const HASH& src) {
1.79 misha 303: #ifdef HASH_ORDER
304: for(Pair *pair=src.first; pair; pair=pair->next)
305: #else
1.59 paf 306: for(int i=0; i<src.allocated; i++)
307: for(Pair *pair=src.refs[i]; pair; pair=pair->link)
1.79 misha 308: #endif
1.59 paf 309: put_dont_replace(pair->key, pair->value);
1.36 paf 310: }
1.11 paf 311:
1.29 paf 312: /// number of elements in hash
1.59 paf 313: int count() const { return fpairs_count; }
1.25 paf 314:
1.59 paf 315: /// iterate over all pairs
316: template<typename I> void for_each(void callback(K, V, I), I info) const {
1.79 misha 317: HASH_FOR_EACH
1.76 misha 318: callback(pair->key, pair->value, info);
1.59 paf 319: }
1.45 paf 320:
1.59 paf 321: /// iterate over all pairs
322: template<typename I> void for_each_ref(void callback(K, V&, I), I info) const {
1.79 misha 323: HASH_FOR_EACH
1.76 misha 324: callback(pair->key, pair->value, info);
1.59 paf 325: }
1.38 paf 326:
1.59 paf 327: /// iterate over all pairs until condition becomes true, return that element
328: template<typename I> V first_that(bool callback(K, V, I), I info) const {
1.79 misha 329: HASH_FOR_EACH
1.75 misha 330: if(callback(pair->key, pair->value, info))
331: return pair->value;
1.59 paf 332: return V(0);
333: }
1.27 paf 334:
1.29 paf 335: /// remove all elements
1.59 paf 336: void clear() {
337: memset(refs, 0, sizeof(*refs)*allocated);
338: fpairs_count=fused_refs=0;
1.75 misha 339: #ifdef HASH_ORDER
340: first=0;
341: last=&first;
342: #endif
1.59 paf 343: }
1.15 paf 344:
1.74 misha 345: protected:
1.1 paf 346:
1.61 paf 347: /// the index of [allocated] in [Hash_allocates]
1.19 paf 348: int allocates_index;
1.1 paf 349:
1.39 paf 350: /// number of allocated pairs
1.19 paf 351: int allocated;
1.1 paf 352:
1.39 paf 353: /// used pairs
1.59 paf 354: int fused_refs;
1.44 parser 355:
356: /// stored pairs total (including those by links)
1.59 paf 357: int fpairs_count;
1.1 paf 358:
1.39 paf 359: /// pair storage
1.59 paf 360: class Pair: public PA_Allocated {
361: public:
1.1 paf 362: uint code;
1.59 paf 363: K key;
364: V value;
1.1 paf 365: Pair *link;
1.75 misha 366: #ifdef HASH_ORDER
367: Pair **prev;
368: Pair *next;
369:
370: Pair(uint acode, K akey, V avalue, Pair *alink, Pair **aprev) : code(acode), key(akey), value(avalue), link(alink),
371: prev(aprev), next(0) { *aprev=this; }
372: #else
373: Pair(uint acode, K akey, V avalue, Pair *alink) : code(acode), key(akey), value(avalue), link(alink) {}
374: #endif
1.2 paf 375: } **refs;
1.1 paf 376:
1.75 misha 377: #ifdef HASH_ORDER
378: Pair *first;
379: Pair **last;
380: #endif
381:
1.83 ! moko 382: /// filled to threshold (THRESHOLD_PERCENT=75), needs expanding
! 383: bool is_full() { return fused_refs + allocated/4 >= allocated; }
1.5 paf 384:
1.39 paf 385: /// allocate larger buffer & rehash
1.59 paf 386: void expand() {
387: int old_allocated=allocated;
388: Pair **old_refs=refs;
389:
1.83 ! moko 390: if (allocates_index<HASH_ALLOCATES_COUNT-1) allocates_index++;
1.59 paf 391: // allocated bigger refs array
1.61 paf 392: allocated=Hash_allocates[allocates_index];
1.59 paf 393: refs=new(UseGC) Pair*[allocated];
394:
395: // rehash
396: Pair **old_ref=old_refs;
397: for(int old_index=0; old_index<old_allocated; old_index++)
398: for(Pair *pair=*old_ref++; pair; ) {
399: Pair *next=pair->link;
400:
401: uint new_index=pair->code%allocated;
402: Pair **new_ref=&refs[new_index];
403: pair->link=*new_ref;
404: *new_ref=pair;
405:
406: pair=next;
407: }
408:
409: delete[] old_refs;
410: }
1.4 paf 411:
412: private: //disabled
413:
1.75 misha 414: HASH& operator = (const HASH&) { return *this; }
1.1 paf 415: };
1.59 paf 416:
1.74 misha 417: /**
1.75 misha 418: Simple String::body hash.
419: Allows hash code caching
1.74 misha 420: */
421:
422: #ifdef HASH_CODE_CACHING
423:
1.75 misha 424: template<typename V> class HASH_STRING: public HASH<const CORD,V> {
1.74 misha 425: public:
426:
1.75 misha 427: typedef typename HASH<const CORD,V>::Pair Pair;
1.74 misha 428: typedef const String::Body &K;
429:
430: typedef K key_type;
431:
432: /// put a [value] under the [key] @returns existed or not
433: bool put(K str, V value) {
434: if(!value) {
435: remove(str);
436: return false;
437: }
438: if(this->is_full())
439: this->expand();
440:
441: CORD key=str.get_cord();
442:
443: uint code=str.get_hash_code();
444: uint index=code%this->allocated;
445: Pair **ref=&this->refs[index];
446: for(Pair *pair=*ref; pair; pair=pair->link)
447: if(pair->code==code && CORD_cmp(pair->key,key)==0) {
448: // found a pair with the same key
449: pair->value=value;
450: return true;
451: }
452:
453: // proper pair not found -- create&link_in new pair
454: if(!*ref) // root cell were fused_refs?
455: this->fused_refs++; // not, we'll use it and record the fact
1.79 misha 456: HASH_NEW_PAIR(code, key, value);
1.74 misha 457: this->fpairs_count++;
458: return false;
459: }
460:
461: /// remove the [key] @returns existed or not
462: bool remove(K str) {
463: CORD key=str.get_cord();
464: uint code=str.get_hash_code();
465: uint index=code%this->allocated;
1.75 misha 466: for(Pair **ref=&this->refs[index]; *ref; ref=&(*ref)->link){
467: Pair *pair=*ref;
468: if(pair->code==code && CORD_cmp(pair->key,key)==0) {
1.74 misha 469: // found a pair with the same key
1.75 misha 470: Pair *next=pair->link;
471: #ifdef HASH_ORDER
472: *(pair->prev)=pair->next;
473: if(pair->next)
474: pair->next->prev=pair->prev;
475: else
476: this->last=pair->prev;
477: #endif
478: delete pair;
1.74 misha 479: *ref=next;
480: --this->fpairs_count;
481: return true;
482: }
1.75 misha 483: }
1.74 misha 484:
485: return false;
486: }
487:
488: /// return true if key exists
489: bool contains(K str){
490: CORD key=str.get_cord();
491: uint code=str.get_hash_code();
492: uint index=code%this->allocated;
493: for(Pair *pair=this->refs[index]; pair; pair=pair->link){
494: if(pair->code==code && CORD_cmp(pair->key,key)==0)
495: return true;
496: }
497:
498: return false;
499: }
500:
501: /// get associated [value] by the [key]
502: V get(K str) const {
503: CORD key=str.get_cord();
504: uint code=str.get_hash_code();
505: uint index=code%this->allocated;
506: for(Pair *pair=this->refs[index]; pair; pair=pair->link)
507: if(pair->code==code && CORD_cmp(pair->key,key)==0)
508: return pair->value;
509:
510: return V(0);
511: }
512:
513: /// put a [value] under the [key] if that [key] existed @returns existed or not
514: bool put_replaced(K str, V value) {
515: if(!value) {
516: remove(str);
517: return false;
518: }
519:
520: CORD key=str.get_cord();
521: uint code=str.get_hash_code();
522: uint index=code%this->allocated;
523: for(Pair *pair=this->refs[index]; pair; pair=pair->link)
524: if(pair->code==code && CORD_cmp(pair->key,key)==0) {
525: // found a pair with the same key, replacing
526: pair->value=value;
527: return true;
528: }
529:
530: // proper pair not found
531: return false;
532: }
533:
534: /// put a [value] under the [key] if that [key] NOT existed @returns existed or not
535: bool put_dont_replace(K str, V value) {
536: if(!value) {
537: remove(str);
538: return false;
539: }
540: if(this->is_full())
541: this->expand();
542:
543: CORD key=str.get_cord();
544: uint code=str.get_hash_code();
545: uint index=code%this->allocated;
546: Pair **ref=&this->refs[index];
547: for(Pair *pair=*ref; pair; pair=pair->link)
548: if(pair->code==code && CORD_cmp(pair->key,key)==0) {
549: // found a pair with the same key, NOT replacing
550: return true;
551: }
552:
553: // proper pair not found -- create&link_in new pair
554: if(!*ref) // root cell were fused_refs?
555: this->fused_refs++; // not, we'll use it and record the fact
1.79 misha 556: HASH_NEW_PAIR(code, key, value);
1.74 misha 557: this->fpairs_count++;
558: return false;
559: }
560:
1.79 misha 561: /// put all 'src' values if NO with same key existed
562: void merge_dont_replace(const HASH_STRING& src) {
1.76 misha 563: #ifdef HASH_ORDER
1.79 misha 564: for(Pair *pair=src.first; pair; pair=pair->next)
1.76 misha 565: #else
1.79 misha 566: for(int i=0; i<src.allocated; i++)
567: for(Pair *pair=src.refs[i]; pair; pair=pair->link)
1.76 misha 568: #endif
1.79 misha 569: put_dont_replace(String::Body(pair->key, pair->code), pair->value);
570: }
571:
572: /// iterate over all pairs
573: template<typename I> void for_each(void callback(K, V, I), I info) const {
574: HASH_FOR_EACH
575: callback(String::Body(pair->key, pair->code), pair->value, info);
1.74 misha 576: }
577:
578: /// iterate over all pairs
579: template<typename I> void for_each_ref(void callback(K, V&, I), I info) const {
1.79 misha 580: HASH_FOR_EACH
581: callback(String::Body(pair->key, pair->code), pair->value, info);
1.74 misha 582: }
583:
584: /// iterate over all pairs until condition becomes true, return that element
585: template<typename I> V first_that(bool callback(K, V, I), I info) const {
1.79 misha 586: HASH_FOR_EACH
1.75 misha 587: if(callback(String::Body(pair->key, pair->code), pair->value, info))
588: return pair->value;
1.74 misha 589: return V(0);
590: }
1.80 misha 591:
592: /// simple hash iterator
593: class Iterator {
594: const HASH_STRING<V>& fhash;
595: Pair *fcurrent;
596: public:
597: Iterator(const HASH_STRING<V>& ahash): fhash(ahash) {
598: fcurrent=fhash.first;
599: }
600:
601: operator bool () {
602: return fcurrent != 0;
603: }
604:
605: void next() {
606: fcurrent=fcurrent->next;
607: }
608:
609: String::Body key(){
610: return String::Body(fcurrent->key, fcurrent->code);
611: }
612:
613: V value(){
614: return fcurrent->value;
615: }
616: };
1.74 misha 617: };
1.78 misha 618: #else //HASH_CODE_CACHING
1.74 misha 619:
1.75 misha 620: template<typename V> class HASH_STRING: public HASH<const String::Body,V>{};
1.74 misha 621:
1.78 misha 622: #endif //HASH_CODE_CACHING
1.74 misha 623:
1.75 misha 624: #ifndef HASH_ORDER
1.74 misha 625: /// Auto-object used to temporarily substituting/removing string hash values
1.59 paf 626: template <typename K, typename V>
1.55 paf 627: class Temp_hash_value {
1.74 misha 628: HashString<V> &fhash;
1.59 paf 629: K fname;
630: V saved_value;
1.55 paf 631: public:
1.74 misha 632: Temp_hash_value(HashString<V>& ahash, K aname, V avalue) :
1.55 paf 633: fhash(ahash),
634: fname(aname),
635: saved_value(ahash.get(aname)) {
636: fhash.put(aname, avalue);
637: }
638: ~Temp_hash_value() {
639: fhash.put(fname, saved_value);
640: }
641: };
1.75 misha 642: #endif
1.1 paf 643:
1.75 misha 644: #endif //PA_HASH_CLASS
E-mail: