Annotation of parser3/src/include/pa_hash.h, revision 1.87
1.28 paf 1: /** @file
1.29 paf 2: Parser: hash class decl.
3:
1.84 moko 4: Copyright (c) 2001-2012 Art. Lebedev Studio (http://www.artlebedev.com)
1.29 paf 5:
1.54 paf 6: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
1.1 paf 7: */
8:
1.59 paf 9: /*
10: The prime numbers used from zend_hash.c,
11: the part of Zend scripting engine library,
12: Copyrighted (C) 1999-2000 Zend Technologies Ltd.
13: http://www.zend.com/license/0_92.txt
14: For more information about Zend please visit http://www.zend.com/
15: */
16:
1.1 paf 17: #ifndef PA_HASH_H
18: #define PA_HASH_H
1.56 paf 19:
1.87 ! moko 20: #define IDENT_PA_HASH_H "$Id: pa_hash.h,v 1.86 2015/03/12 08:18:18 misha Exp $"
1.1 paf 21:
1.59 paf 22: #include "pa_memory.h"
1.1 paf 23: #include "pa_types.h"
1.74 misha 24: #include "pa_string.h"
1.59 paf 25:
26: const int HASH_ALLOCATES_COUNT=29;
1.1 paf 27:
1.61 paf 28: /** Zend comment: Generated on an Octa-ALPHA 300MHz CPU & 2.5GB RAM monster
29:
30: paf: HPUX ld could not handle static member: unsatisfied symbols
31: */
32: static uint Hash_allocates[HASH_ALLOCATES_COUNT]={
33: 5, 11, 19, 53, 107, 223, 463, 983, 1979, 3907, 7963,
34: 16229, 32531, 65407, 130987, 262237, 524521, 1048793,
35: 2097397, 4194103, 8388857, 16777447, 33554201, 67108961,
36: 134217487, 268435697, 536870683, 1073741621, 2147483399};
37:
1.68 misha 38: /// useful generic hash function
39: inline void generic_hash_code(uint& result, char c) {
40: result=(result<<4)+c;
41: if(uint g=(result&0xF0000000)) {
42: result=result^(g>>24);
43: result=result^g;
44: }
45: }
46: /// useful generic hash function
47: inline void generic_hash_code(uint& result, const char* s) {
48: while(char c=*s++) {
49: result=(result<<4)+c;
50: if(uint g=(result&0xF0000000)) {
51: result=result^(g>>24);
52: result=result^g;
53: }
54: }
55: }
56:
57: /// useful generic hash function
58: inline void generic_hash_code(uint& result, const char* buf, size_t size) {
59: const char* end=buf+size;
60: while(buf<end) {
61: result=(result<<4)+*buf++;
62: if(uint g=(result&0xF0000000)) {
63: result=result^(g>>24);
64: result=result^g;
65: }
66: }
67: }
68:
69: /// simple hash code of int. used by EXIF mapping
70: inline uint hash_code(int self) {
71: uint result=0;
72: generic_hash_code(result, (const char*)&self, sizeof(self));
73: return result;
74: }
75:
1.75 misha 76: #endif // PA_HASH_H
77:
78: #ifndef PA_HASH_CLASS
79: #define PA_HASH_CLASS
1.29 paf 80: /**
1.59 paf 81: Simple hash.
1.29 paf 82:
1.59 paf 83: Automatically rehashed when almost is_full.
1.51 paf 84: Contains no 0 values.
85: get returning 0 means there were no such.
86: "put value 0" means "remove"
1.29 paf 87: */
1.75 misha 88: #ifdef HASH_ORDER
89:
90: #undef HASH
91: #undef HASH_STRING
1.79 misha 92: #undef HASH_NEW_PAIR
93: #undef HASH_FOR_EACH
1.75 misha 94:
95: #define HASH OrderedHash
96: #define HASH_STRING OrderedHashString
1.79 misha 97: #define HASH_NEW_PAIR(code, key, value) *ref=new Pair(code, key, value, *ref, this->last); this->last=&((*ref)->next)
98:
99: #define HASH_FOR_EACH \
100: for(Pair *pair=this->first; pair; pair=pair->next)
1.75 misha 101:
102: #else
103:
104: #define HASH Hash
105: #define HASH_STRING HashString
1.79 misha 106: #define HASH_NEW_PAIR(code, key, value) *ref=new Pair(code, key, value, *ref)
107:
108: #define HASH_FOR_EACH \
109: Pair **ref=this->refs; \
110: for(int index=0; index<this->allocated; index++) \
111: for(Pair *pair=*ref++; pair; pair=pair->link)
1.75 misha 112:
113: #endif
114:
115: template<typename K, typename V> class HASH: public PA_Object {
1.1 paf 116: public:
117:
1.59 paf 118: typedef K key_type;
119: typedef V value_type;
1.3 paf 120:
1.75 misha 121: HASH() {
1.61 paf 122: allocated=Hash_allocates[allocates_index=0];
1.59 paf 123: fpairs_count=fused_refs=0;
1.87 ! moko 124: refs=new Pair*[allocated];
1.75 misha 125: #ifdef HASH_ORDER
126: first=0;
127: last=&first;
128: #endif
1.59 paf 129: }
1.25 paf 130:
1.75 misha 131: HASH(const HASH& source) {
1.59 paf 132: allocates_index=source.allocates_index;
133: allocated=source.allocated;
134: fused_refs=source.fused_refs;
135: fpairs_count=source.fpairs_count;
1.87 ! moko 136: refs=new Pair*[allocated];
1.81 moko 137: // clone & rehash
1.75 misha 138: #ifdef HASH_ORDER
139: first=0;
140: last=&first;
1.81 moko 141: for(Pair *pair=source.first; pair; pair=pair->next)
142: {
143: uint index=pair->code%allocated;
144: Pair **ref=&refs[index];
145: HASH_NEW_PAIR(pair->code, pair->key, pair->value);
146: }
147: #else
148: for(int i=0; i<source.allocated; i++)
149: for(Pair *pair=source.refs[i]; pair; pair=pair->link)
150: {
151: Pair **ref=&refs[i];
1.79 misha 152: HASH_NEW_PAIR(pair->code, pair->key, pair->value);
1.59 paf 153: }
1.81 moko 154: #endif
1.43 parser 155: }
156:
1.73 misha 157: #ifdef USE_DESTRUCTORS
1.75 misha 158: ~HASH() {
1.72 misha 159: Pair **ref=refs;
160: for(int index=0; index<allocated; index++)
161: for(Pair *pair=*ref++; pair;){
162: Pair *next=pair->link;
163: delete pair;
164: pair=next;
165: }
1.71 misha 166: delete[] refs;
167: }
1.73 misha 168: #endif
1.71 misha 169:
1.59 paf 170: /// put a [value] under the [key] @returns existed or not
171: bool put(K key, V value) {
172: if(!value) {
173: remove(key);
174: return false;
175: }
176: if(is_full())
177: expand();
178:
179: uint code=hash_code(key);
180: uint index=code%allocated;
181: Pair **ref=&refs[index];
182: for(Pair *pair=*ref; pair; pair=pair->link)
183: if(pair->code==code && pair->key==key) {
184: // found a pair with the same key
185: pair->value=value;
186: return true;
187: }
188:
189: // proper pair not found -- create&link_in new pair
190: if(!*ref) // root cell were fused_refs?
191: fused_refs++; // not, we'll use it and record the fact
1.79 misha 192: HASH_NEW_PAIR(code, key, value);
1.59 paf 193: fpairs_count++;
194: return false;
1.24 paf 195: }
1.10 paf 196:
1.59 paf 197: /// remove the [key] @returns existed or not
198: bool remove(K key) {
199: uint code=hash_code(key);
200: uint index=code%allocated;
1.75 misha 201: for(Pair **ref=&refs[index]; *ref; ref=&(*ref)->link){
202: Pair *pair=*ref;
203: if(pair->code==code && pair->key==key) {
1.59 paf 204: // found a pair with the same key
1.75 misha 205: Pair *next=pair->link;
206: #ifdef HASH_ORDER
207: *(pair->prev)=pair->next;
208: if(pair->next)
209: pair->next->prev=pair->prev;
210: else
211: last=pair->prev;
212: #endif
213: delete pair;
1.59 paf 214: *ref=next;
215: --fpairs_count;
216: return true;
217: }
1.75 misha 218: }
1.8 paf 219:
1.59 paf 220: return false;
221: }
1.48 paf 222:
1.70 misha 223: /// return true if key exists
1.69 misha 224: bool contains(K key){
1.67 misha 225: uint code=hash_code(key);
226: uint index=code%allocated;
1.70 misha 227: for(Pair *pair=refs[index]; pair; pair=pair->link){
228: if(pair->code==code && pair->key==key)
1.67 misha 229: return true;
230: }
231:
232: return false;
233: }
234:
1.59 paf 235: /// get associated [value] by the [key]
236: V get(K key) const {
237: uint code=hash_code(key);
238: uint index=code%allocated;
239: for(Pair *pair=refs[index]; pair; pair=pair->link)
240: if(pair->code==code && pair->key==key)
241: return pair->value;
242:
243: return V(0);
1.33 paf 244: }
1.70 misha 245:
1.82 misha 246: #ifdef HASH_ORDER
1.86 misha 247: String::Body first_key() const {
248: return (first) ? String::Body(first->key, first->code) : String::Body();
249: }
250:
1.82 misha 251: V first_value() const {
252: return (first) ? first->value : V(0);
253: }
254:
1.86 misha 255: String::Body last_key() const {
256: if (fpairs_count) {
257: Pair* pair = (Pair*)((char *)last - offsetof(Pair, next));
258: return String::Body(pair->key, pair->code);
259: } else {
260: return String::Body();
261: }
262: }
263:
1.82 misha 264: V last_value() const {
265: return (fpairs_count) ? ((Pair *)((char *)last - offsetof(Pair, next)))->value : V(0);
266: }
267: #endif
268:
1.51 paf 269: /// put a [value] under the [key] if that [key] existed @returns existed or not
1.63 paf 270: bool put_replaced(K key, V value) {
1.59 paf 271: if(!value) {
272: remove(key);
273: return false;
274: }
275: uint code=hash_code(key);
276: uint index=code%allocated;
277: for(Pair *pair=refs[index]; pair; pair=pair->link)
278: if(pair->code==code && pair->key==key) {
279: // found a pair with the same key, replacing
280: pair->value=value;
281: return true;
282: }
283:
284: // proper pair not found
285: return false;
1.64 paf 286: }
287:
1.51 paf 288: /// put a [value] under the [key] if that [key] NOT existed @returns existed or not
1.59 paf 289: bool put_dont_replace(K key, V value) {
290: if(!value) {
291: remove(key);
292: return false;
293: }
294: if(is_full())
295: expand();
296:
297: uint code=hash_code(key);
298: uint index=code%allocated;
299: Pair **ref=&refs[index];
300: for(Pair *pair=*ref; pair; pair=pair->link)
301: if(pair->code==code && pair->key==key) {
302: // found a pair with the same key, NOT replacing
303: return true;
304: }
305:
306: // proper pair not found -- create&link_in new pair
307: if(!*ref) // root cell were fused_refs?
308: fused_refs++; // not, we'll use it and record the fact
1.79 misha 309: HASH_NEW_PAIR(code, key, value);
1.59 paf 310: fpairs_count++;
311: return false;
312: }
1.18 paf 313:
1.79 misha 314: /// put all 'src' values if NO with same key existed
1.75 misha 315: void merge_dont_replace(const HASH& src) {
1.79 misha 316: #ifdef HASH_ORDER
317: for(Pair *pair=src.first; pair; pair=pair->next)
318: #else
1.59 paf 319: for(int i=0; i<src.allocated; i++)
320: for(Pair *pair=src.refs[i]; pair; pair=pair->link)
1.79 misha 321: #endif
1.59 paf 322: put_dont_replace(pair->key, pair->value);
1.36 paf 323: }
1.11 paf 324:
1.29 paf 325: /// number of elements in hash
1.59 paf 326: int count() const { return fpairs_count; }
1.25 paf 327:
1.59 paf 328: /// iterate over all pairs
329: template<typename I> void for_each(void callback(K, V, I), I info) const {
1.79 misha 330: HASH_FOR_EACH
1.76 misha 331: callback(pair->key, pair->value, info);
1.59 paf 332: }
1.45 paf 333:
1.59 paf 334: /// iterate over all pairs
335: template<typename I> void for_each_ref(void callback(K, V&, I), I info) const {
1.79 misha 336: HASH_FOR_EACH
1.76 misha 337: callback(pair->key, pair->value, info);
1.59 paf 338: }
1.38 paf 339:
1.59 paf 340: /// iterate over all pairs until condition becomes true, return that element
341: template<typename I> V first_that(bool callback(K, V, I), I info) const {
1.79 misha 342: HASH_FOR_EACH
1.75 misha 343: if(callback(pair->key, pair->value, info))
344: return pair->value;
1.59 paf 345: return V(0);
346: }
1.27 paf 347:
1.29 paf 348: /// remove all elements
1.59 paf 349: void clear() {
350: memset(refs, 0, sizeof(*refs)*allocated);
351: fpairs_count=fused_refs=0;
1.75 misha 352: #ifdef HASH_ORDER
353: first=0;
354: last=&first;
355: #endif
1.59 paf 356: }
1.15 paf 357:
1.74 misha 358: protected:
1.1 paf 359:
1.61 paf 360: /// the index of [allocated] in [Hash_allocates]
1.19 paf 361: int allocates_index;
1.1 paf 362:
1.39 paf 363: /// number of allocated pairs
1.19 paf 364: int allocated;
1.1 paf 365:
1.39 paf 366: /// used pairs
1.59 paf 367: int fused_refs;
1.44 parser 368:
369: /// stored pairs total (including those by links)
1.59 paf 370: int fpairs_count;
1.1 paf 371:
1.39 paf 372: /// pair storage
1.59 paf 373: class Pair: public PA_Allocated {
374: public:
1.1 paf 375: uint code;
1.59 paf 376: K key;
377: V value;
1.1 paf 378: Pair *link;
1.75 misha 379: #ifdef HASH_ORDER
380: Pair **prev;
381: Pair *next;
382:
383: Pair(uint acode, K akey, V avalue, Pair *alink, Pair **aprev) : code(acode), key(akey), value(avalue), link(alink),
384: prev(aprev), next(0) { *aprev=this; }
385: #else
386: Pair(uint acode, K akey, V avalue, Pair *alink) : code(acode), key(akey), value(avalue), link(alink) {}
387: #endif
1.2 paf 388: } **refs;
1.1 paf 389:
1.75 misha 390: #ifdef HASH_ORDER
391: Pair *first;
392: Pair **last;
393: #endif
394:
1.83 moko 395: /// filled to threshold (THRESHOLD_PERCENT=75), needs expanding
396: bool is_full() { return fused_refs + allocated/4 >= allocated; }
1.5 paf 397:
1.39 paf 398: /// allocate larger buffer & rehash
1.59 paf 399: void expand() {
400: int old_allocated=allocated;
401: Pair **old_refs=refs;
402:
1.83 moko 403: if (allocates_index<HASH_ALLOCATES_COUNT-1) allocates_index++;
1.59 paf 404: // allocated bigger refs array
1.61 paf 405: allocated=Hash_allocates[allocates_index];
1.87 ! moko 406: refs=new Pair*[allocated];
1.59 paf 407:
408: // rehash
409: Pair **old_ref=old_refs;
410: for(int old_index=0; old_index<old_allocated; old_index++)
411: for(Pair *pair=*old_ref++; pair; ) {
412: Pair *next=pair->link;
413:
414: uint new_index=pair->code%allocated;
415: Pair **new_ref=&refs[new_index];
416: pair->link=*new_ref;
417: *new_ref=pair;
418:
419: pair=next;
420: }
421:
422: delete[] old_refs;
423: }
1.4 paf 424:
425: private: //disabled
426:
1.75 misha 427: HASH& operator = (const HASH&) { return *this; }
1.1 paf 428: };
1.59 paf 429:
1.74 misha 430: /**
1.75 misha 431: Simple String::body hash.
432: Allows hash code caching
1.74 misha 433: */
434:
435: #ifdef HASH_CODE_CACHING
436:
1.75 misha 437: template<typename V> class HASH_STRING: public HASH<const CORD,V> {
1.74 misha 438: public:
439:
1.75 misha 440: typedef typename HASH<const CORD,V>::Pair Pair;
1.74 misha 441: typedef const String::Body &K;
442:
443: typedef K key_type;
444:
445: /// put a [value] under the [key] @returns existed or not
446: bool put(K str, V value) {
447: if(!value) {
448: remove(str);
449: return false;
450: }
451: if(this->is_full())
452: this->expand();
453:
454: CORD key=str.get_cord();
455:
456: uint code=str.get_hash_code();
457: uint index=code%this->allocated;
458: Pair **ref=&this->refs[index];
459: for(Pair *pair=*ref; pair; pair=pair->link)
460: if(pair->code==code && CORD_cmp(pair->key,key)==0) {
461: // found a pair with the same key
462: pair->value=value;
463: return true;
464: }
465:
466: // proper pair not found -- create&link_in new pair
467: if(!*ref) // root cell were fused_refs?
468: this->fused_refs++; // not, we'll use it and record the fact
1.79 misha 469: HASH_NEW_PAIR(code, key, value);
1.74 misha 470: this->fpairs_count++;
471: return false;
472: }
473:
474: /// remove the [key] @returns existed or not
475: bool remove(K str) {
476: CORD key=str.get_cord();
477: uint code=str.get_hash_code();
478: uint index=code%this->allocated;
1.75 misha 479: for(Pair **ref=&this->refs[index]; *ref; ref=&(*ref)->link){
480: Pair *pair=*ref;
481: if(pair->code==code && CORD_cmp(pair->key,key)==0) {
1.74 misha 482: // found a pair with the same key
1.75 misha 483: Pair *next=pair->link;
484: #ifdef HASH_ORDER
485: *(pair->prev)=pair->next;
486: if(pair->next)
487: pair->next->prev=pair->prev;
488: else
489: this->last=pair->prev;
490: #endif
491: delete pair;
1.74 misha 492: *ref=next;
493: --this->fpairs_count;
494: return true;
495: }
1.75 misha 496: }
1.74 misha 497:
498: return false;
499: }
500:
501: /// return true if key exists
502: bool contains(K str){
503: CORD key=str.get_cord();
504: uint code=str.get_hash_code();
505: uint index=code%this->allocated;
506: for(Pair *pair=this->refs[index]; pair; pair=pair->link){
507: if(pair->code==code && CORD_cmp(pair->key,key)==0)
508: return true;
509: }
510:
511: return false;
512: }
513:
514: /// get associated [value] by the [key]
515: V get(K str) const {
516: CORD key=str.get_cord();
517: uint code=str.get_hash_code();
518: uint index=code%this->allocated;
519: for(Pair *pair=this->refs[index]; pair; pair=pair->link)
520: if(pair->code==code && CORD_cmp(pair->key,key)==0)
521: return pair->value;
522:
523: return V(0);
524: }
525:
526: /// put a [value] under the [key] if that [key] existed @returns existed or not
527: bool put_replaced(K str, V value) {
528: if(!value) {
529: remove(str);
530: return false;
531: }
532:
533: CORD key=str.get_cord();
534: uint code=str.get_hash_code();
535: uint index=code%this->allocated;
536: for(Pair *pair=this->refs[index]; pair; pair=pair->link)
537: if(pair->code==code && CORD_cmp(pair->key,key)==0) {
538: // found a pair with the same key, replacing
539: pair->value=value;
540: return true;
541: }
542:
543: // proper pair not found
544: return false;
545: }
546:
547: /// put a [value] under the [key] if that [key] NOT existed @returns existed or not
548: bool put_dont_replace(K str, V value) {
549: if(!value) {
550: remove(str);
551: return false;
552: }
553: if(this->is_full())
554: this->expand();
555:
556: CORD key=str.get_cord();
557: uint code=str.get_hash_code();
558: uint index=code%this->allocated;
559: Pair **ref=&this->refs[index];
560: for(Pair *pair=*ref; pair; pair=pair->link)
561: if(pair->code==code && CORD_cmp(pair->key,key)==0) {
562: // found a pair with the same key, NOT replacing
563: return true;
564: }
565:
566: // proper pair not found -- create&link_in new pair
567: if(!*ref) // root cell were fused_refs?
568: this->fused_refs++; // not, we'll use it and record the fact
1.79 misha 569: HASH_NEW_PAIR(code, key, value);
1.74 misha 570: this->fpairs_count++;
571: return false;
572: }
573:
1.79 misha 574: /// put all 'src' values if NO with same key existed
575: void merge_dont_replace(const HASH_STRING& src) {
1.76 misha 576: #ifdef HASH_ORDER
1.79 misha 577: for(Pair *pair=src.first; pair; pair=pair->next)
1.76 misha 578: #else
1.79 misha 579: for(int i=0; i<src.allocated; i++)
580: for(Pair *pair=src.refs[i]; pair; pair=pair->link)
1.76 misha 581: #endif
1.79 misha 582: put_dont_replace(String::Body(pair->key, pair->code), pair->value);
583: }
584:
585: /// iterate over all pairs
586: template<typename I> void for_each(void callback(K, V, I), I info) const {
587: HASH_FOR_EACH
588: callback(String::Body(pair->key, pair->code), pair->value, info);
1.74 misha 589: }
590:
591: /// iterate over all pairs
592: template<typename I> void for_each_ref(void callback(K, V&, I), I info) const {
1.79 misha 593: HASH_FOR_EACH
594: callback(String::Body(pair->key, pair->code), pair->value, info);
1.74 misha 595: }
596:
597: /// iterate over all pairs until condition becomes true, return that element
598: template<typename I> V first_that(bool callback(K, V, I), I info) const {
1.79 misha 599: HASH_FOR_EACH
1.75 misha 600: if(callback(String::Body(pair->key, pair->code), pair->value, info))
601: return pair->value;
1.74 misha 602: return V(0);
603: }
1.80 misha 604:
605: /// simple hash iterator
606: class Iterator {
607: const HASH_STRING<V>& fhash;
608: Pair *fcurrent;
609: public:
610: Iterator(const HASH_STRING<V>& ahash): fhash(ahash) {
611: fcurrent=fhash.first;
612: }
613:
614: operator bool () {
615: return fcurrent != 0;
616: }
617:
618: void next() {
619: fcurrent=fcurrent->next;
620: }
621:
622: String::Body key(){
623: return String::Body(fcurrent->key, fcurrent->code);
624: }
625:
626: V value(){
627: return fcurrent->value;
628: }
629: };
1.74 misha 630: };
1.78 misha 631: #else //HASH_CODE_CACHING
1.74 misha 632:
1.75 misha 633: template<typename V> class HASH_STRING: public HASH<const String::Body,V>{};
1.74 misha 634:
1.78 misha 635: #endif //HASH_CODE_CACHING
1.74 misha 636:
1.75 misha 637: #ifndef HASH_ORDER
1.74 misha 638: /// Auto-object used to temporarily substituting/removing string hash values
1.85 moko 639: template <typename H, typename V>
1.55 paf 640: class Temp_hash_value {
1.85 moko 641: H *fhash;
642: String::Body fname;
1.59 paf 643: V saved_value;
1.55 paf 644: public:
1.85 moko 645: Temp_hash_value(H *ahash, String::Body aname, V avalue) : fhash(ahash), fname(aname) {
646: if(fhash){
647: saved_value=fhash->get(aname);
648: fhash->put(aname, avalue);
649: }
1.55 paf 650: }
1.85 moko 651: ~Temp_hash_value() {
652: if(fhash)
653: fhash->put(fname, saved_value);
1.55 paf 654: }
655: };
1.75 misha 656: #endif
1.1 paf 657:
1.75 misha 658: #endif //PA_HASH_CLASS
E-mail: