Last active
August 29, 2015 14:24
-
-
Save jnm2/4a240dbf2f8e385b0059 to your computer and use it in GitHub Desktop.
Calculates the String.GetHashCode compatible hash code for a substring without allocating the substring
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// <summary> | |
/// Calculates the hash code for a substring without allocating the substring. | |
/// </summary> | |
// Identical to String.GetHashCode except for custom start and stop | |
// I have unit tests on this | |
[SecuritySafeCritical] | |
[ReliabilityContract(Consistency.WillNotCorruptState, Cer.MayFail)] | |
public static int GetHashCode(this string str, int startIndex, int length) | |
{ | |
if (str == null) throw new ArgumentNullException("str"); | |
if (startIndex < 0 || startIndex > str.Length - length) throw new ArgumentOutOfRangeException("startIndex", startIndex, "Start index must be greater than or equal to zero and less than or equal to the length of the string minus the substring length."); | |
if (length < 0) throw new ArgumentOutOfRangeException("length", length, "Length must be greater than or equal to zero."); | |
unsafe | |
{ | |
fixed (char* src = str) | |
{ | |
Contract.Assert(src[str.Length] == '\0', "src[this.Length] == '\\0'"); | |
Contract.Assert(((int)src) % 4 == 0, "Managed string should start at 4 bytes boundary"); | |
// Must use Environment.Is64BitProcess instead of conditional compilation for AnyCPU builds | |
int hash1 = Environment.Is64BitProcess ? 5381 : (5381 << 16) + 5381; | |
int hash2 = hash1; | |
if (!Environment.Is64BitProcess) | |
{ | |
// 32 bit machines. | |
int* pint = (int*)(src + startIndex); // ORIGINAL: int* pint = (int *)src; | |
int len = length; // ORIGINAL: int len = src.Length; | |
while (len > 3) // ORIGINAL: while (len > 2) // this implementation hashes the zero terminator if the length mod 4 is 3. Unfortunately, the substring is probably not zero-terminated. | |
{ | |
hash1 = ((hash1 << 5) + hash1 + (hash1 >> 27)) ^ pint[0]; | |
hash2 = ((hash2 << 5) + hash2 + (hash2 >> 27)) ^ pint[1]; | |
pint += 2; | |
len -= 4; | |
} | |
// BEGIN NEW | |
switch (len) | |
{ | |
case 3: | |
// Avoid hashing the next character, since it's not guaranteed to be a zero terminator anymore | |
hash1 = ((hash1 << 5) + hash1 + (hash1 >> 27)) ^ pint[0]; | |
hash2 = ((hash2 << 5) + hash2 + (hash2 >> 27)) ^ (pint[1] & 0xFF); | |
break; | |
case 2: | |
hash1 = ((hash1 << 5) + hash1 + (hash1 >> 27)) ^ pint[0]; // ORIGINAL when (len > 0) | |
break; | |
case 1: | |
// Avoid hashing the next character, since it's not guaranteed to be a zero terminator anymore | |
hash1 = ((hash1 << 5) + hash1 + (hash1 >> 27)) ^ (pint[0] & 0xFF); | |
break; | |
} | |
//END NEW | |
} | |
else | |
{ | |
// INLINED: int c; | |
char* s = (src + startIndex); // ORIGINAL: char *s = src; | |
int len = length; // NEW | |
while (len != 0) // ORIGINAL: while ((c = s[0]) != 0) { | |
{ | |
// INLINED: c = s[0]; | |
hash1 = ((hash1 << 5) + hash1) ^ s[0]; // ORIGINAL: hash1 = ((hash1 << 5) + hash1) ^ c; | |
// INLINED: c = s[1]; | |
if (len == 1) // ORIGINAL: if (c == 0) | |
break; | |
len -= 2; // NEW | |
hash2 = ((hash2 << 5) + hash2) ^ s[1]; | |
s += 2; | |
} | |
} | |
return hash1 + (hash2 * 1566083941); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment