/* Copyright 1995 J"orn Rennecke */ #include "port.h" #include "common.h" #include "lex.h" #if defined(sparc) && !defined(WORDS_BIGENDIAN) #define WORDS_BIGENDIAN #endif #define ACTUAL_HASHMAX (((HASHMAX + 6) & -8) + 4) #ifdef i386 #define HAVE_ROTATE #endif #ifdef HAVE_ROTATE /* gcc 2.5.8 recognizes this */ #define ROL(n,a) ((a) = (((a) << (n)) | ((a) >> (8*sizeof(a)-(n))))) #define ROR(n,a) ((a) = (((a) >> (n)) | ((a) << (8*sizeof(a)-(n))))) #else /* enable optimizations employing XOR commutativeness */ #define ROL(n,a) ((a) = (((a) << (n)) ^ ((a) >> 8*sizeof(a)-(n)))) #define ROR(n,a) ((a) = (((a) >> (n)) ^ ((a) << 8*sizeof(a)-(n)))) #endif /* T[] is based on Peter K. Pearson's article in CACM 33-6, pp. 677. */ static unsigned char T[] = { 1, 87, 49, 12, 176, 178, 102, 166, 121, 193, 6, 84, 249, 230, 44, 163, 14, 197, 213, 181, 161, 85, 218, 80, 64, 239, 24, 226, 236, 142, 38, 200, 110, 177, 104, 103, 141, 253, 255, 50, 77, 101, 81, 18, 45, 96, 31, 222, 25, 107, 190, 70, 86, 237, 240, 34, 72, 242, 20, 214, 244, 227, 149, 235, 97, 234, 57, 22, 60, 250, 82, 175, 208, 5, 127, 199, 111, 62, 135, 248, 174, 169, 211, 58, 66, 154, 106, 195, 245, 171, 17, 187, 182, 179, 0, 243, 132, 56, 148, 75, 128, 133, 158, 100, 130, 126, 91, 13, 153, 246, 216, 219, 119, 68, 223, 78, 83, 88, 201, 99, 122, 11, 92, 32, 136, 114, 52, 10, 138, 30, 48, 183, 156, 35, 61, 26, 143, 74, 251, 94, 129, 162, 63, 152, 170, 7, 115, 167, 241, 206, 3, 150, 55, 59, 151, 220, 90, 53, 23, 131, 125, 173, 15, 238, 79, 95, 89, 16, 105, 137, 225, 224, 217, 160, 37, 123, 118, 73, 2, 157, 46, 116, 9, 145, 134, 228, 207, 212, 202, 215, 69, 229, 27, 188, 67, 124, 168, 252, 42, 4, 29, 108, 21, 247, 19, 205, 39, 203, 233, 40, 186, 147, 198, 192, 155, 33, 164, 191, 98, 204, 165, 180, 117, 76, 140, 36, 210, 172, 41, 54, 159, 8, 185, 232, 113, 196, 231, 47, 146, 120, 51, 65, 28, 144, 254, 221, 93, 189, 194, 139, 112, 43, 71, 109, 184, 209, }; #ifdef WORDS_BIGENDIAN #if defined(HAVE_ROTATE) && 0 /* not implemented */ #define FAVOUR_ROL #else #define FAVOUR_SHIFT #endif #else /* WORDS_BIGENDIAN */ #define FAVOUR_ROR #endif /* WORDS_BIGENDIAN */ int32 aphash(char *str, p_int length) { uint32 a, b; uint8 c; length -= sizeof(a); a = 0; if ((int32)(b = length) > 0) { if ((int32)(b -= ACTUAL_HASHMAX - 2*sizeof(a) + 1) >= 0) { length += (p_int)str; b &= -sizeof(b); str += b; } else { length += (p_int)str; if (b & sizeof(a)) { a = *(int32*)(void*)str; ROL(1,a); str += sizeof(a); if (str >= (char *)length) goto loop_end; } } do { a ^= *(int32*)(void*)str; ROL(1,a); a ^= ((int32*)(void*)str)[1]; ROL(1,a); str += sizeof(a) << 1; } while(str < (char *)length); loop_end: length -= (p_int)str; } length = -length; length <<= 3; b = *(int32*)(void*)str; #ifdef WORDS_BIGENDIAN b >>= length; a ^= b; b <<= length; #else b <<= length; #ifdef FAVOUR_ROR ROL(length,a); #endif a ^= b; #ifdef FAVOUR_ROR ROL(8,a); #endif b >>= length; #endif *(int32*)(void*)str = b; /* pad string in memory to allow easy compare */ /* * The following code combines a into a 16 bit hash value. It depends * on sizeof(a) == 4 . On most processors, most or all shifts should be * replacable by byte and word swap/access facilities, although * gcc 2.5.8 -m486 is to stupid to notice that. */ c = a; c = T[c]; c ^= a >> 8; a >>= 16; a ^= T[c] << 8; c = a >> 8; a ^= T[c]; return a; } #ifdef FAVOUR_ROR struct idhash_ret idhash(char *p) { unsigned char c, d; uint32 a; struct idhash_ret ret; a = *(unsigned char *)p; c = *++p; goto first_test; do { p += 4; ROR(7,a); c = p[0]; a ^= d; first_test: if (!isalunum(c)) goto add0; ROR(8,a); d = p[1]; a ^= c; if (!isalunum(d)) goto add1; ROR(8,a); c = p[2]; a ^= d; if (!isalunum(c)) goto add2; ROR(8,a); d = p[3]; a ^= c; } while (isalunum(d)); p += 1; add2: p += 1; add1: p += 1; add0: c = a; c = T[c]; c ^= a >> 8; a >>= 16; a ^= T[c] << 8; c = a >> 8; a ^= T[c]; ret.p = p; ret.hash = a; return ret; } #endif /* FAVOUR_ROR */ #ifdef FAVOUR_SHIFT /* * Version optimized to cut down rotate instructions, because sparc/mips needs * to use two shifts & an (X)OR to do them. aphash() hash value compatibility * only for big endian bytesex. */ struct idhash_ret idhash(char *p) { unsigned char c, d; uint32 a, b; unsigned char lo, hi; struct idhash_ret ret; b = *(unsigned char *)p; a = 0; c = *++p; goto first_test; do { p += 4; b <<= 8; c = p[0]; ROL(1,a); a ^= b; b ^= d; first_test: if (!isalunum(c)) goto add0; b <<= 8; d = p[1]; b ^= c; if (!isalunum(d)) goto add1; b <<= 8; c = p[2]; b ^= d; if (!isalunum(c)) goto add2; b <<= 8; d = p[3]; b ^= c; } while (isalunum(d)); p += 1; add2: p += 1; add1: p += 1; add0: ROL(1,a); a ^= b; c = a; c = T[c]; c ^= a >> 8; a >>= 16; a ^= T[c] << 8; c = a >> 8; a ^= T[c]; ret.p = p; ret.hash = a; return ret; } #endif /* * aphash() compatible hash routine for unaligned strings, for use on * machines that prohibit unaligned access. Much faster than idhash(), * but can only be used when the size is known beforehand. * Needed in the (pathological) case that an identifier is longer than HASHMAX. * Also used for object hashing. */ #ifndef VARIABLE_ROTATE /* beware of shifts >= 32 */ #ifdef WORDS_BIGENDIAN #define READ_UNAL(n, p) (((p)[0] << (n)) | \ (((p)[1] >> (8*sizeof(*(p))-8-(n)))>>8)) #else #define READ_UNAL(n, p) (((p)[0] >> (n)) | \ (((p)[1] << (8*sizeof(*(p))-8-(n)))<<8)) #endif #endif #ifdef sparc #define ANDN #endif int32 uhash(char *start, p_int length) { char *str; uint32 a, b, mask; uint8 c; int shift; shift = ((p_int)start & (sizeof(a)-1)) << 3; #ifdef WORDS_BIGENDIAN mask = -1 >> shift; #else mask = -1 << shift; #endif str = (char *)((p_int)start & -sizeof(a)); length -= sizeof(a); a = 0; if ((int32)(b = length) > 0) { if ((int32)(b -= ACTUAL_HASHMAX - 2*sizeof(a) + 1) >= 0) { length += (p_int)str; b &= -sizeof(a); str += b; } else { length += (p_int)str; if (b & sizeof(a)) { a = *(int32*)(void*)str; str += sizeof(a); #ifdef ANDN a &= mask; a ^= *(int32*)(void*)str & ~mask; #else a ^= *(int32*)(void*)str; a &= mask; a ^= *(int32*)(void*)str; #endif ROL(1,a); if (str >= (char *)length) goto loop_end; } } do { #ifdef ANDN a ^= (*(int32*)(void*)str & mask) ^ (((int32*)(void*)str)[1] & ~mask); ROL(1,a); a ^= ((int32*)(void*)str)[1] & mask; str += sizeof(a) << 1; a ^= ((int32*)(void*)str)[0] & ~mask; ROL(1,a); #else b = *(int32*)(void*)str; b ^= ((int32*)(void*)str)[1]; b &= mask; b ^= ((int32*)(void*)str)[1]; a ^= b; ROL(1,a); b = ((int32*)(void*)str)[1]; str += sizeof(a) << 1; b ^= ((int32*)(void*)str)[0]; b &= mask; b ^= ((int32*)(void*)str)[0]; a ^= b; ROL(1,a); #endif } while(str < (char *)length); loop_end: length -= (p_int)str; #ifdef WORDS_BIGENDIAN ROL(shift,a); #else ROR(shift,a); #endif } length = -length; length <<= 3; #ifdef VARIABLE_ROTATE b = *(int32*)(void*)str; b ^= ((int32*)(void*)str)[1]; b &= mask; b ^= ((int32*)(void*)str)[1]; #ifdef WORDS_BIGENDIAN ROL(shift,b); #else ROR(shift,b); #endif #else /* !VARIABLE_ROTATE */ b = READ_UNAL(shift, (int32*)(void*)str); #endif /* VARIABLE_ROTATE */ #ifdef WORDS_BIGENDIAN b >>= length; a ^= b; #else b <<= length; #ifdef FAVOUR_ROR ROL(length,a); #endif a ^= b; #ifdef FAVOUR_ROR ROL(8,a); #endif #endif c = a; c = T[c]; c ^= a >> 8; a >>= 16; a ^= T[c] << 8; c = a >> 8; a ^= T[c]; return a; } #if 0 /* * char based hash routines. alignment hints showed no sucess, * overall performance is lacking. */ /* alignment hint */ union quadchar { struct { unsigned char c0, c1, c2, c3; } c; int32 i; }; int hashblock(char *s, mp_int len, int max) { int hi, lo; register union quadchar *p = (union quadchar *)s; register int i = max; if (len < i) { i = len; } hi = lo = 0; do { lo = T[lo ^ p->c.c0]; hi = T[hi ^ p->c.c1]; lo = T[lo ^ p->c.c2]; lo = T[lo ^ p->c.c3]; p++; } while ((i -= 4) > 0); return (hi << 8) + lo; } int hashstr(char *s, mp_int len, int max) { int hi, lo; register unsigned char *p = (void *)s; register int i = max; if (len < i) { i = len; } hi = lo = 0; while ((i -= 4) >= 0) { lo = T[lo ^ *p]; hi = T[hi ^ p[1]]; lo = T[lo ^ p[2]]; lo = T[lo ^ p[3]]; p+=4; } switch (i) { case -1: lo = T[lo ^ *p++]; case -2: hi = T[hi ^ *p++]; case -3: lo = T[lo ^ *p]; } return (hi << 8) + lo; } #endif