/* Copyright 1995 J"orn Rennecke */
#include "port.h"
#include "common.h"
#include "lex.h"
#if defined(sparc) && !defined(WORDS_BIGENDIAN)
#define WORDS_BIGENDIAN
#endif
#define ACTUAL_HASHMAX (((HASHMAX + 6) & -8) + 4)
#ifdef i386
#define HAVE_ROTATE
#endif
#ifdef HAVE_ROTATE
/* gcc 2.5.8 recognizes this */
#define ROL(n,a) ((a) = (((a) << (n)) | ((a) >> (8*sizeof(a)-(n)))))
#define ROR(n,a) ((a) = (((a) >> (n)) | ((a) << (8*sizeof(a)-(n)))))
#else
/* enable optimizations employing XOR commutativeness */
#define ROL(n,a) ((a) = (((a) << (n)) ^ ((a) >> 8*sizeof(a)-(n))))
#define ROR(n,a) ((a) = (((a) >> (n)) ^ ((a) << 8*sizeof(a)-(n))))
#endif
/* T[] is based on Peter K. Pearson's article in CACM 33-6, pp. 677. */
static unsigned char T[] = {
1, 87, 49, 12, 176, 178, 102, 166, 121, 193, 6, 84, 249, 230, 44, 163,
14, 197, 213, 181, 161, 85, 218, 80, 64, 239, 24, 226, 236, 142, 38, 200,
110, 177, 104, 103, 141, 253, 255, 50, 77, 101, 81, 18, 45, 96, 31, 222,
25, 107, 190, 70, 86, 237, 240, 34, 72, 242, 20, 214, 244, 227, 149, 235,
97, 234, 57, 22, 60, 250, 82, 175, 208, 5, 127, 199, 111, 62, 135, 248,
174, 169, 211, 58, 66, 154, 106, 195, 245, 171, 17, 187, 182, 179, 0, 243,
132, 56, 148, 75, 128, 133, 158, 100, 130, 126, 91, 13, 153, 246, 216, 219,
119, 68, 223, 78, 83, 88, 201, 99, 122, 11, 92, 32, 136, 114, 52, 10,
138, 30, 48, 183, 156, 35, 61, 26, 143, 74, 251, 94, 129, 162, 63, 152,
170, 7, 115, 167, 241, 206, 3, 150, 55, 59, 151, 220, 90, 53, 23, 131,
125, 173, 15, 238, 79, 95, 89, 16, 105, 137, 225, 224, 217, 160, 37, 123,
118, 73, 2, 157, 46, 116, 9, 145, 134, 228, 207, 212, 202, 215, 69, 229,
27, 188, 67, 124, 168, 252, 42, 4, 29, 108, 21, 247, 19, 205, 39, 203,
233, 40, 186, 147, 198, 192, 155, 33, 164, 191, 98, 204, 165, 180, 117, 76,
140, 36, 210, 172, 41, 54, 159, 8, 185, 232, 113, 196, 231, 47, 146, 120,
51, 65, 28, 144, 254, 221, 93, 189, 194, 139, 112, 43, 71, 109, 184, 209,
};
#ifdef WORDS_BIGENDIAN
#if defined(HAVE_ROTATE) && 0 /* not implemented */
#define FAVOUR_ROL
#else
#define FAVOUR_SHIFT
#endif
#else /* WORDS_BIGENDIAN */
#define FAVOUR_ROR
#endif /* WORDS_BIGENDIAN */
int32 aphash(char *str, p_int length) {
uint32 a, b;
uint8 c;
length -= sizeof(a);
a = 0;
if ((int32)(b = length) > 0) {
if ((int32)(b -= ACTUAL_HASHMAX - 2*sizeof(a) + 1) >= 0) {
length += (p_int)str;
b &= -sizeof(b);
str += b;
} else {
length += (p_int)str;
if (b & sizeof(a)) {
a = *(int32*)(void*)str;
ROL(1,a);
str += sizeof(a);
if (str >= (char *)length)
goto loop_end;
}
}
do {
a ^= *(int32*)(void*)str;
ROL(1,a);
a ^= ((int32*)(void*)str)[1];
ROL(1,a);
str += sizeof(a) << 1;
} while(str < (char *)length);
loop_end:
length -= (p_int)str;
}
length = -length;
length <<= 3;
b = *(int32*)(void*)str;
#ifdef WORDS_BIGENDIAN
b >>= length;
a ^= b;
b <<= length;
#else
b <<= length;
#ifdef FAVOUR_ROR
ROL(length,a);
#endif
a ^= b;
#ifdef FAVOUR_ROR
ROL(8,a);
#endif
b >>= length;
#endif
*(int32*)(void*)str = b; /* pad string in memory to allow easy compare */
/*
* The following code combines a into a 16 bit hash value. It depends
* on sizeof(a) == 4 . On most processors, most or all shifts should be
* replacable by byte and word swap/access facilities, although
* gcc 2.5.8 -m486 is to stupid to notice that.
*/
c = a;
c = T[c];
c ^= a >> 8;
a >>= 16;
a ^= T[c] << 8;
c = a >> 8;
a ^= T[c];
return a;
}
#ifdef FAVOUR_ROR
struct idhash_ret idhash(char *p) {
unsigned char c, d;
uint32 a;
struct idhash_ret ret;
a = *(unsigned char *)p;
c = *++p;
goto first_test;
do {
p += 4;
ROR(7,a);
c = p[0];
a ^= d;
first_test:
if (!isalunum(c))
goto add0;
ROR(8,a);
d = p[1];
a ^= c;
if (!isalunum(d))
goto add1;
ROR(8,a);
c = p[2];
a ^= d;
if (!isalunum(c))
goto add2;
ROR(8,a);
d = p[3];
a ^= c;
} while (isalunum(d));
p += 1;
add2:
p += 1;
add1:
p += 1;
add0:
c = a;
c = T[c];
c ^= a >> 8;
a >>= 16;
a ^= T[c] << 8;
c = a >> 8;
a ^= T[c];
ret.p = p;
ret.hash = a;
return ret;
}
#endif /* FAVOUR_ROR */
#ifdef FAVOUR_SHIFT
/*
* Version optimized to cut down rotate instructions, because sparc/mips needs
* to use two shifts & an (X)OR to do them. aphash() hash value compatibility
* only for big endian bytesex.
*/
struct idhash_ret idhash(char *p) {
unsigned char c, d;
uint32 a, b;
unsigned char lo, hi;
struct idhash_ret ret;
b = *(unsigned char *)p;
a = 0;
c = *++p;
goto first_test;
do {
p += 4;
b <<= 8;
c = p[0];
ROL(1,a);
a ^= b;
b ^= d;
first_test:
if (!isalunum(c))
goto add0;
b <<= 8;
d = p[1];
b ^= c;
if (!isalunum(d))
goto add1;
b <<= 8;
c = p[2];
b ^= d;
if (!isalunum(c))
goto add2;
b <<= 8;
d = p[3];
b ^= c;
} while (isalunum(d));
p += 1;
add2:
p += 1;
add1:
p += 1;
add0:
ROL(1,a);
a ^= b;
c = a;
c = T[c];
c ^= a >> 8;
a >>= 16;
a ^= T[c] << 8;
c = a >> 8;
a ^= T[c];
ret.p = p;
ret.hash = a;
return ret;
}
#endif
/*
* aphash() compatible hash routine for unaligned strings, for use on
* machines that prohibit unaligned access. Much faster than idhash(),
* but can only be used when the size is known beforehand.
* Needed in the (pathological) case that an identifier is longer than HASHMAX.
* Also used for object hashing.
*/
#ifndef VARIABLE_ROTATE
/* beware of shifts >= 32 */
#ifdef WORDS_BIGENDIAN
#define READ_UNAL(n, p) (((p)[0] << (n)) | \
(((p)[1] >> (8*sizeof(*(p))-8-(n)))>>8))
#else
#define READ_UNAL(n, p) (((p)[0] >> (n)) | \
(((p)[1] << (8*sizeof(*(p))-8-(n)))<<8))
#endif
#endif
#ifdef sparc
#define ANDN
#endif
int32 uhash(char *start, p_int length) {
char *str;
uint32 a, b, mask;
uint8 c;
int shift;
shift = ((p_int)start & (sizeof(a)-1)) << 3;
#ifdef WORDS_BIGENDIAN
mask = -1 >> shift;
#else
mask = -1 << shift;
#endif
str = (char *)((p_int)start & -sizeof(a));
length -= sizeof(a);
a = 0;
if ((int32)(b = length) > 0) {
if ((int32)(b -= ACTUAL_HASHMAX - 2*sizeof(a) + 1) >= 0) {
length += (p_int)str;
b &= -sizeof(a);
str += b;
} else {
length += (p_int)str;
if (b & sizeof(a)) {
a = *(int32*)(void*)str;
str += sizeof(a);
#ifdef ANDN
a &= mask;
a ^= *(int32*)(void*)str & ~mask;
#else
a ^= *(int32*)(void*)str;
a &= mask;
a ^= *(int32*)(void*)str;
#endif
ROL(1,a);
if (str >= (char *)length)
goto loop_end;
}
}
do {
#ifdef ANDN
a ^= (*(int32*)(void*)str & mask) ^
(((int32*)(void*)str)[1] & ~mask);
ROL(1,a);
a ^= ((int32*)(void*)str)[1] & mask;
str += sizeof(a) << 1;
a ^= ((int32*)(void*)str)[0] & ~mask;
ROL(1,a);
#else
b = *(int32*)(void*)str;
b ^= ((int32*)(void*)str)[1];
b &= mask;
b ^= ((int32*)(void*)str)[1];
a ^= b;
ROL(1,a);
b = ((int32*)(void*)str)[1];
str += sizeof(a) << 1;
b ^= ((int32*)(void*)str)[0];
b &= mask;
b ^= ((int32*)(void*)str)[0];
a ^= b;
ROL(1,a);
#endif
} while(str < (char *)length);
loop_end:
length -= (p_int)str;
#ifdef WORDS_BIGENDIAN
ROL(shift,a);
#else
ROR(shift,a);
#endif
}
length = -length;
length <<= 3;
#ifdef VARIABLE_ROTATE
b = *(int32*)(void*)str;
b ^= ((int32*)(void*)str)[1];
b &= mask;
b ^= ((int32*)(void*)str)[1];
#ifdef WORDS_BIGENDIAN
ROL(shift,b);
#else
ROR(shift,b);
#endif
#else /* !VARIABLE_ROTATE */
b = READ_UNAL(shift, (int32*)(void*)str);
#endif /* VARIABLE_ROTATE */
#ifdef WORDS_BIGENDIAN
b >>= length;
a ^= b;
#else
b <<= length;
#ifdef FAVOUR_ROR
ROL(length,a);
#endif
a ^= b;
#ifdef FAVOUR_ROR
ROL(8,a);
#endif
#endif
c = a;
c = T[c];
c ^= a >> 8;
a >>= 16;
a ^= T[c] << 8;
c = a >> 8;
a ^= T[c];
return a;
}
#if 0
/*
* char based hash routines. alignment hints showed no sucess,
* overall performance is lacking.
*/
/* alignment hint */
union quadchar { struct { unsigned char c0, c1, c2, c3; } c; int32 i; };
int hashblock(char *s, mp_int len, int max) {
int hi, lo;
register union quadchar *p = (union quadchar *)s;
register int i = max;
if (len < i) {
i = len;
}
hi = lo = 0;
do {
lo = T[lo ^ p->c.c0];
hi = T[hi ^ p->c.c1];
lo = T[lo ^ p->c.c2];
lo = T[lo ^ p->c.c3];
p++;
} while ((i -= 4) > 0);
return (hi << 8) + lo;
}
int hashstr(char *s, mp_int len, int max) {
int hi, lo;
register unsigned char *p = (void *)s;
register int i = max;
if (len < i) {
i = len;
}
hi = lo = 0;
while ((i -= 4) >= 0) {
lo = T[lo ^ *p];
hi = T[hi ^ p[1]];
lo = T[lo ^ p[2]];
lo = T[lo ^ p[3]];
p+=4;
}
switch (i) {
case -1:
lo = T[lo ^ *p++];
case -2:
hi = T[hi ^ *p++];
case -3:
lo = T[lo ^ *p];
}
return (hi << 8) + lo;
}
#endif